{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,14]],"date-time":"2026-07-14T10:37:23Z","timestamp":1784025443543,"version":"3.55.0"},"reference-count":64,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,5,29]]},"DOI":"10.1109\/icra48891.2023.10160473","type":"proceedings-article","created":{"date-parts":[[2023,7,4]],"date-time":"2023-07-04T17:20:56Z","timestamp":1688491256000},"page":"5645-5652","source":"Crossref","is-referenced-by-count":8,"title":["A Contextual Bandit Approach for Learning to Plan in Environments with Probabilistic Goal Configurations"],"prefix":"10.1109","author":[{"given":"Sohan","family":"Rudra","sequence":"first","affiliation":[{"name":"Google"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Saksham","family":"Goel","sequence":"additional","affiliation":[{"name":"Google"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Anirban","family":"Santara","sequence":"additional","affiliation":[{"name":"Google"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Claudio","family":"Gentile","sequence":"additional","affiliation":[{"name":"Google"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Laurent","family":"Perron","sequence":"additional","affiliation":[{"name":"Google"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Fei","family":"Xia","sequence":"additional","affiliation":[{"name":"Google"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Vikas","family":"Sindhwani","sequence":"additional","affiliation":[{"name":"Google"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Carolina","family":"Parada","sequence":"additional","affiliation":[{"name":"Google"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Gaurav","family":"Aggarwal","sequence":"additional","affiliation":[{"name":"Google"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref13","article-title":"Learning object-conditioned exploration using distributed soft actor critic","volume":"abs 2007 14545","author":"wahid","year":"2020","journal-title":"CoRR"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1145\/3097983.3098043"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01832"},{"key":"ref56","article-title":"Attention is all you need","volume":"30","author":"vaswani","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2021.3064065"},{"key":"ref59","article-title":"Open-vocabulary object detection via vision and language knowledge distillation","author":"gu","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref14","article-title":"Learning to map for active semantic goal navigation","author":"georgakis","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref58","article-title":"Objectnav revisited: On evaluation of embodied agents navigating to objects","volume":"abs 2006 13171","author":"batra","year":"2020","journal-title":"CoRR"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2017.00081"},{"key":"ref52","author":"perron","year":"0","journal-title":"Or-tools"},{"key":"ref11","article-title":"Learning exploration policies for navigation","author":"chen","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref55","author":"camacho","year":"2013","journal-title":"Model Predictive Control"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793493"},{"key":"ref54","author":"coumans","year":"2016","journal-title":"Pybullet a python module for physics simulation for games robotics and machine learning"},{"key":"ref17","doi-asserted-by":"crossref","first-page":"30","DOI":"10.1109\/37.236323","article-title":"Mobile robot navigation using neural networks and nonmetrical environmental models","volume":"13","author":"meng","year":"1993","journal-title":"IEEE Control Systems Magazine"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.1993.291944"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1023\/A:1008824626321"},{"key":"ref18","first-page":"602","article-title":"Fuzzy-nav: A vision-based robot navigation architecture using fuzzy inference for uncertainty-reasoning","author":"pan","year":"1995","journal-title":"World Congress on Neural Networks"},{"key":"ref51","first-page":"11492","article-title":"Neural contextual bandits with ucb-based exploration","author":"zhou","year":"2020","journal-title":"International Conference on Machine Learning"},{"key":"ref50","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1016\/0005-1098(89)90002-2"},{"key":"ref45","article-title":"On evaluation of embodied navigation agents","author":"anderson","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2013.6580446"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/TCST.2009.2017934"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1016\/S0020-0190(00)00102-2"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-61440-0_135"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/SFCS.2003.1238179"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-47867-1_17"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC48978.2021.9564767"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2003.1248826"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/s10846-008-9235-4"},{"key":"ref9","article-title":"Visual-based obstacle detection: a purposive approach using the normal ow","author":"santos-victor","year":"1995","journal-title":"Proc of the International Conference on Intelligent Autonomous Systems"},{"key":"ref4","article-title":"Do as i can, not as i say: Grounding language in robotic affordances","author":"ahn","year":"2022","journal-title":"ArXiv Preprint"},{"key":"ref3","article-title":"From seeing to moving: A survey on learning for visual indoor navigation (vin)","author":"ye","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00387"},{"key":"ref5","first-page":"1","article-title":"Embodied question answering","author":"das","year":"0","journal-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/BF02097807"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00943"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00945"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1145\/195058.195125"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-007-5016-8"},{"key":"ref31","author":"wei","year":"2018","journal-title":"New methods for solving the minimum weighted latency problem"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/1772690.1772758"},{"key":"ref33","article-title":"Ai2-thor: An interactive 3d environment for visual ai","volume":"abs 1712 5474","author":"kolve","year":"2017","journal-title":"ArXiv"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2017.00081"},{"key":"ref2","article-title":"Object goal navigation using goal-oriented semantic exploration","author":"chaplot","year":"0","journal-title":"Neural Information Processing Systems (NeurIPS)"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-27833-7_1"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1002\/net.10031"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1051\/ita\/1986200100791"},{"key":"ref24","article-title":"Neural topological slam for visual navigation","author":"chaplot","year":"0","journal-title":"CVPR"},{"key":"ref23","article-title":"Learning to explore using active neural slam","author":"chaplot","year":"0","journal-title":"International Conference on Learning Representations (ICLR)"},{"key":"ref26","article-title":"On the sample complexity of end-to-end training vs. semantic abstraction training","author":"shalev-shwartz","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref25","article-title":"Scene memory transformer for embodied agents in long time horizon tasks","author":"fang","year":"0","journal-title":"CVPR 2019"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/70.88137"},{"key":"ref64","article-title":"Improved algorithms for linear stochastic bandits","author":"abbasi-yadkori","year":"0","journal-title":"NIPS"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1137\/S0097539703432542"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/34.982903"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/21.44033"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/83.623193"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICIINFS.2011.6038117"},{"key":"ref29","article-title":"Pointnet++: Deep hierarchical feature learning on point sets in a metric space","volume":"30","author":"qi","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1057\/jors.1975.151"},{"key":"ref62","first-page":"1151","article-title":"On multilabel classification and ranking with partial feedback","volume":"25","author":"gentile","year":"2012","journal-title":"Advances in neural information processing systems"},{"key":"ref61","first-page":"767","article-title":"Learning to plan variable length sequences of actions with a cascading bandit click model of user feedback","volume":"151","author":"santara","year":"0","journal-title":"Proceedings of The 25th International Conference on Artificial Intelligence and Statistics"}],"event":{"name":"2023 IEEE International Conference on Robotics and Automation (ICRA)","location":"London, United Kingdom","start":{"date-parts":[[2023,5,29]]},"end":{"date-parts":[[2023,6,2]]}},"container-title":["2023 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10160211\/10160212\/10160473.pdf?arnumber=10160473","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,24]],"date-time":"2023-07-24T17:29:32Z","timestamp":1690219772000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10160473\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,29]]},"references-count":64,"URL":"https:\/\/doi.org\/10.1109\/icra48891.2023.10160473","relation":{},"subject":[],"published":{"date-parts":[[2023,5,29]]}}}