{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T20:52:49Z","timestamp":1768337569665,"version":"3.49.0"},"reference-count":55,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2009,8,1]],"date-time":"2009-08-01T00:00:00Z","timestamp":1249084800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Auton Robot"],"published-print":{"date-parts":[[2009,8]]},"DOI":"10.1007\/s10514-009-9130-2","type":"journal-article","created":{"date-parts":[[2009,8,3]],"date-time":"2009-08-03T17:43:33Z","timestamp":1249321413000},"page":"93-103","source":"Crossref","is-referenced-by-count":160,"title":["A Bayesian exploration-exploitation approach for optimal online sensing and planning with a visually guided mobile robot"],"prefix":"10.1007","volume":"27","author":[{"given":"Ruben","family":"Martinez-Cantin","sequence":"first","affiliation":[]},{"given":"Nando","family":"de Freitas","sequence":"additional","affiliation":[]},{"given":"Eric","family":"Brochu","sequence":"additional","affiliation":[]},{"given":"Jos\u00e9","family":"Castellanos","sequence":"additional","affiliation":[]},{"given":"Arnaud","family":"Doucet","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2009,8,4]]},"reference":[{"key":"9130_CR1","doi-asserted-by":"crossref","unstructured":"Bailey, T., Nieto, J., Guivant, J., Stevens, M., & Nebot, E. (2006). Consistency of the EKF-SLAM algorithm. In Proc. of the IEEE\/RSJ int. conf. on intelligent robots and systems, 2006.","DOI":"10.1109\/IROS.2006.281644"},{"issue":"4","key":"9130_CR2","doi-asserted-by":"crossref","first-page":"319","DOI":"10.1613\/jair.806","volume":"15","author":"J. Baxter","year":"2001","unstructured":"Baxter, J., & Bartlett, P. L. (2001). Infinite-horizon policy-gradient estimation. Journal of Artificial Intelligence Research, 15(4), 319\u2013350.","journal-title":"Journal of Artificial Intelligence Research"},{"key":"9130_CR3","unstructured":"Bergman, N. (1999). Recursive Bayesian estimation: navigation and tracking applications. PhD thesis, Link\u00f6ping University."},{"key":"9130_CR4","volume-title":"Dynamic programming and optimal control","author":"D. Bertsekas","year":"1995","unstructured":"Bertsekas, D. (1995). Dynamic programming and optimal control. Nashua: Athena Scientific."},{"key":"9130_CR5","unstructured":"Brochu, E., de Freitas, N., & Ghosh, A. (2007). Active preference learning with discrete choice data. In Advances in neural information processing systems, 2007."},{"issue":"1","key":"9130_CR6","doi-asserted-by":"crossref","first-page":"261","DOI":"10.1109\/TAES.2008.4517003","volume":"44","author":"M. Bryson","year":"2008","unstructured":"Bryson, M., & Sukkarieh, S. (2008). Observability analysis and active control for airborne SLAM. IEEE Transaction on Aerospace Electronic Systems, 44(1), 261\u2013280.","journal-title":"IEEE Transaction on Aerospace Electronic Systems"},{"key":"9130_CR7","doi-asserted-by":"crossref","first-page":"273","DOI":"10.1214\/ss\/1177009939","volume":"10","author":"K. Chaloner","year":"1995","unstructured":"Chaloner, K., & Verdinelli, I. (1995). Bayesian experimental design: a review. Journal of Statistical Science, 10, 273\u2013304.","journal-title":"Journal of Statistical Science"},{"key":"9130_CR8","doi-asserted-by":"crossref","first-page":"99","DOI":"10.1109\/MRA.2006.1638022","volume":"13","author":"H. Durrant-Whyte","year":"2006","unstructured":"Durrant-Whyte, H., & Bailey, T. (2006). Simultaneous localisation and mapping (SLAM): part I the essential algorithms. Robotics and Automation Magazine, 13, 99\u2013110.","journal-title":"Robotics and Automation Magazine"},{"key":"9130_CR9","unstructured":"Finkel, D. (2003). DIRECT optimization algorithm user guide. Center for Research in Scientific Computation, North Carolina State University."},{"key":"9130_CR10","unstructured":"Gablonsky, J. (2001). Modification of the DIRECT algorithm. PhD thesis, Department of Mathematics, North Carolina State University, Raleigh, North Carolina."},{"key":"9130_CR11","unstructured":"Hernandez, M. (2004). Optimal sensor trajectories in bearings-only tracking. In P.\u00a0Svensson & J.\u00a0Schubert (Eds.), Proc. of the seventh int. conf. on information fusion, international society of information fusion, Mountain View, CA (Vol. II, pp. 893\u2013900)."},{"issue":"2","key":"9130_CR12","doi-asserted-by":"crossref","first-page":"399","DOI":"10.1109\/TAES.2004.1309993","volume":"40","author":"M. Hernandez","year":"2004","unstructured":"Hernandez, M., Kirubarajan, T., & Bar-Shalom, Y. (2004). Multisensor resource deployment using posterior Cram\u00e8r-Rao bounds. IEEE Transactions on Aerospace Electronic Systems, 40(2), 399\u2013416.","journal-title":"IEEE Transactions on Aerospace Electronic Systems"},{"key":"9130_CR13","doi-asserted-by":"crossref","unstructured":"Howard, M., Klanke, S., Gienger, M., Goerick, C., & Vijayakumar, S. (2009). A novel method for learning policies from variable constraint data. Autonomous Robots, 27 (Special issue on Robot Learning, Part B) (this issue).","DOI":"10.1007\/s10514-009-9129-8"},{"key":"9130_CR14","doi-asserted-by":"crossref","first-page":"345","DOI":"10.1023\/A:1012771025575","volume":"21","author":"D. Jones","year":"2001","unstructured":"Jones, D. (2001). A\u00a0taxonomy of global optimization methods based on response surfaces. Journal of Global Optimization, 21, 345\u2013383.","journal-title":"Journal of Global Optimization"},{"issue":"1","key":"9130_CR15","doi-asserted-by":"crossref","first-page":"157","DOI":"10.1007\/BF00941892","volume":"79","author":"D. Jones","year":"1993","unstructured":"Jones, D., Perttunen, C., & Stuckman, B. (1993). Lipschitzian optimization without the Lipschitz constant. Journal of Optimization Theory and Applications, 79(1), 157\u2013181.","journal-title":"Journal of Optimization Theory and Applications"},{"issue":"4","key":"9130_CR16","doi-asserted-by":"crossref","first-page":"455","DOI":"10.1023\/A:1008306431147","volume":"13","author":"D. Jones","year":"1998","unstructured":"Jones, D., Schonlau, M., & Welch, W. (1998). Efficient global optimization of expensive black-box functions. Journal of Global Optimization, 13(4), 455\u2013492.","journal-title":"Journal of Global Optimization"},{"key":"9130_CR17","unstructured":"Kato, H., & Billinghurst, M. (1999). Marker tracking and hmd calibration for a video-based augmentedreality conferencing system. In Proc. of the 2nd IEEE and ACM int. work. on augmented reality (pp.\u00a085\u201394) 1999."},{"issue":"2","key":"9130_CR18","doi-asserted-by":"crossref","first-page":"175","DOI":"10.1177\/0278364907087426","volume":"27","author":"T. Kollar","year":"2008","unstructured":"Kollar, T., & Roy, N. (2008). Trajectory optimization using reinforcement learning for map exploration. International Journal of Robotics Research, 27(2), 175\u2013197.","journal-title":"International Journal of Robotics Research"},{"issue":"4","key":"9130_CR19","doi-asserted-by":"crossref","first-page":"1143","DOI":"10.1137\/S0363012901385691","volume":"42","author":"V. Konda","year":"2003","unstructured":"Konda, V., & Tsitsiklis, J. (2003). On actor-critic algorithms. SIAM Journal on Control and Optimization, 42(4), 1143\u20131166.","journal-title":"SIAM Journal on Control and Optimization"},{"key":"9130_CR20","doi-asserted-by":"crossref","unstructured":"Kueck, H., de Freitas, N., & Doucet, A. (2006). SMC samplers for Bayesian optimal nonlinear design. In Nonlinear statistical signal processing workshop (NSSPW), 2006.","DOI":"10.1109\/NSSPW.2006.4378829"},{"key":"9130_CR21","doi-asserted-by":"crossref","first-page":"97","DOI":"10.1115\/1.3653121","volume":"86","author":"H. Kushner","year":"1964","unstructured":"Kushner, H. (1964). A new method of locating the maximum of an arbitrary multipeak curve in the presence of noise. Journal of Basic Engineering, 86, 97\u2013106.","journal-title":"Journal of Basic Engineering"},{"key":"9130_CR22","doi-asserted-by":"crossref","unstructured":"Leung, C., Huang, S., Dissanayake, G., & Forukawa, T. (2005). Trajectory planning for multiple robots in bearing-only target localisation. In Proc. of the IEEE\/RSJ int. conf. on intelligent robots and systems, 2005.","DOI":"10.1109\/IROS.2005.1545322"},{"key":"9130_CR23","unstructured":"Lizotte, D. (2008). Practical Bayesian optimization. PhD thesis, Dept. of Computer Science, University of Alberta."},{"key":"9130_CR24","unstructured":"Lizotte, D., Wang, T., Bowling, M., & Schuurmans, D. (2007). Automatic gait optimization with Gaussian process regression. In International joint conference on artificial intelligence, 2007."},{"key":"9130_CR25","doi-asserted-by":"crossref","first-page":"57","DOI":"10.1023\/A:1008294716304","volume":"10","author":"M. Locatelli","year":"1997","unstructured":"Locatelli, M. (1997). Bayesian algorithms for one-dimensional global optimization. Journal of Global Optimization, 10, 57\u201376.","journal-title":"Journal of Global Optimization"},{"key":"9130_CR26","volume-title":"Predictive control: with constraints","author":"J. Maciejowski","year":"2002","unstructured":"Maciejowski, J. (2002). Predictive control: with constraints. New York: Prentice-Hall."},{"key":"9130_CR27","unstructured":"Martinez-Cantin, R. (2008). Active map learning for robots: insights into statistical consistency. PhD thesis, University of Zaragoza."},{"key":"9130_CR28","unstructured":"Martinez-Cantin, R., de Freitas, N., & Castellanos, J. (2006). Analysis of particle methods for simultaneous robot localization and mapping and a new algorithm: Marginal-SLAM. In Proc. of the IEEE int. conf. on robotics & automation, 2006."},{"key":"9130_CR29","doi-asserted-by":"crossref","unstructured":"Martinez-Cantin, R., de Freitas, N., & Castellanos, J. (2007a). Active policy learning for robot planning and exploration under uncertainty. In Proc.\u00a0of robotics: science and systems, 2007.","DOI":"10.15607\/RSS.2007.III.041"},{"key":"9130_CR30","doi-asserted-by":"crossref","unstructured":"Martinez-Cantin, R., de Freitas, N., Doucet, A., & Castellanos, J. (2007b). Active policy learning for robot planning and exploration under uncertainty. In Robotics: science and systems (RSS), 2007.","DOI":"10.15607\/RSS.2007.III.041"},{"key":"9130_CR31","doi-asserted-by":"crossref","unstructured":"Meger, D., Marinakis, D., Rekleitis, I., & Dudek, G. (2009). Inferring a probability distribution function for the pose of a sensor network using a mobile robot. In: ICRA, 2009.","DOI":"10.1109\/ROBOT.2009.5152800"},{"issue":"1","key":"9130_CR32","first-page":"140","volume":"3","author":"G. Metta","year":"2006","unstructured":"Metta, G., Fitzpatrick, P., & Natale, L. (2006). Yarp: yet another robot platform. International Journal on Advanced Robotics Systems, 3(1), 140\u2013151.","journal-title":"International Journal on Advanced Robotics Systems"},{"key":"9130_CR33","first-page":"117","volume-title":"Towards global optimisation","author":"J. Mockus","year":"1978","unstructured":"Mockus, J., Tiesis, V., & Zilinskas, A. (1978). The application of Bayesian methods for seeking the extremum. In L.\u00a0Dixon & G.\u00a0Szego (Eds.), Towards global optimisation (Vol.\u00a02, pp.\u00a0117\u2013129). Amsterdam: Elsevier."},{"key":"9130_CR34","unstructured":"Ng, A., & Jordan, M. (2000). PEGASUS: a policy search method for large MDPs and POMDPs. In Proc. of the sixteenth conf. on uncertainty in artificial intelligence, 2000."},{"key":"9130_CR35","unstructured":"Paris, S., & Le Cadre, J. (2002). Planification for terrain-aided navigation. In Fusion 2002, Annapolis, Maryland (pp.\u00a01007\u20131014)."},{"key":"9130_CR36","doi-asserted-by":"crossref","unstructured":"Peters, J., & Schaal, S. (2006). Policy gradient methods for robotics. In Proc. of the IEEE\/RSJ int. conf. on intelligent robots and systems, 2006.","DOI":"10.1109\/IROS.2006.282564"},{"issue":"7\u20139","key":"9130_CR37","doi-asserted-by":"crossref","first-page":"1180","DOI":"10.1016\/j.neucom.2007.11.026","volume":"71","author":"J. Peters","year":"2008","unstructured":"Peters, J., & Schaal, S. (2008a). Natural actor critic. Neurocomputing, 71(7\u20139), 1180\u20131190.","journal-title":"Neurocomputing"},{"issue":"4","key":"9130_CR38","doi-asserted-by":"crossref","first-page":"682","DOI":"10.1016\/j.neunet.2008.02.003","volume":"21","author":"J. Peters","year":"2008","unstructured":"Peters, J., & Schaal, S. (2008b). Reinforcement learning of motor skills with policy gradients. Neural Networks, 21(4), 682\u2013697.","journal-title":"Neural Networks"},{"key":"9130_CR39","volume-title":"Gaussian processes for machine learning","author":"C. Rasmussen","year":"2006","unstructured":"Rasmussen, C., & Williams, C. (2006). Gaussian processes for machine learning. Cambridge: The MIT Press."},{"issue":"1","key":"9130_CR40","doi-asserted-by":"crossref","first-page":"55","DOI":"10.1007\/s10514-009-9120-4","volume":"27","author":"M. Riedmiller","year":"2009","unstructured":"Riedmiller, M., Gabel, T., Hafner, R., & Lange, S. (2009). Reinforcement learning for robot soccer. Autonomous Robots, 27(1), 55\u201373 (Special issue on Robot Learning, Part\u00a0A).","journal-title":"Autonomous Robots"},{"key":"9130_CR41","unstructured":"Sasena, M. (2002). Flexibility and efficiency enhancement for constrained global design optimization with Kriging approximations. PhD thesis, University of Michigan."},{"key":"9130_CR42","doi-asserted-by":"crossref","unstructured":"Schonlau, M., Welch, W., & Jones, D. (1998). Global versus local search in constrained optimization of computer models. In N.\u00a0Flournoy, W.\u00a0Rosenberger, W.\u00a0Wong (Eds.) New developments and applications in experimental design (Vol.\u00a034, pp.\u00a011\u201325). Institute of Mathematical Statistics.","DOI":"10.1214\/lnms\/1215456182"},{"key":"9130_CR43","doi-asserted-by":"crossref","unstructured":"Sim, R., & Roy, N. (2005). Global A-optimal robot exploration in SLAM. In Proc. of the IEEE int. conf. on robotics & automation, 2005.","DOI":"10.1109\/ROBOT.2005.1570193"},{"key":"9130_CR44","unstructured":"Singh, A., Krause, A., Guestrin, C., Kaiser, W., & Batalin, M. (2007). Efficient planning of informative paths for multiple robots. In Proc. of the int. joint conf. on artificial intelligence, 2007."},{"key":"9130_CR45","doi-asserted-by":"crossref","first-page":"707","DOI":"10.1613\/jair.2674","volume":"34","author":"A. Singh","year":"2009","unstructured":"Singh, A., Krause, A., Guestrin, C., & Kaiser, W. (2009). Efficient informative sensing using multiple robots. Journal of Artificial Intelligence Research (JAIR), 34, 707\u2013755.","journal-title":"Journal of Artificial Intelligence Research (JAIR)"},{"key":"9130_CR46","doi-asserted-by":"crossref","unstructured":"Singh, S., Kantas, N., Doucet, A., Vo, B., & Evans, R. (2005). Simulation-based optimal sensor scheduling with application to observer trajectory planning. In Proc. of the IEEE conf. on decision and control and eur. control conference (pp.\u00a07296\u20137301) 2005.","DOI":"10.1109\/CDC.2005.1583338"},{"key":"9130_CR47","doi-asserted-by":"crossref","first-page":"1071","DOI":"10.1287\/opre.21.5.1071","volume":"21","author":"R. Smallwood","year":"1973","unstructured":"Smallwood, R., & Sondik, E. (1973). The optimal control of partially observable Markov processes over a finite horizon. Operations Research, 21, 1071\u20131088.","journal-title":"Operations Research"},{"key":"9130_CR48","doi-asserted-by":"crossref","unstructured":"Stachniss, C., Grisetti, G., & Burgard, W. (2005). Information gain-based exploration using Rao-Blackwellized particle filters. In Proc.\u00a0of robotics: science and systems, Cambridge, USA, 2005.","DOI":"10.15607\/RSS.2005.I.009"},{"key":"9130_CR49","unstructured":"Stolle, M., & Atkeson, C. (2009). Finding and transferring policies using stored behaviors. Autonomous Robots, 27 (Special issue on Robot Learning, Part B) (this issue)."},{"issue":"1","key":"9130_CR50","doi-asserted-by":"crossref","first-page":"31","DOI":"10.1049\/ip-rsn:19990262","volume":"146","author":"O. Tremois","year":"1999","unstructured":"Tremois, O., & Le Cadre, J. (1999). Optimal observer trajectory in bearings-only tracking for manoeuvering sources. IEE Proceeding Radar, Sonar Navigation, 146(1), 31\u201339.","journal-title":"IEE Proceeding Radar, Sonar Navigation"},{"key":"9130_CR51","unstructured":"Vazquez, E., & Bect, J. (2008). On the convergence of the expected improvement algorithm. arXivorg arXiv:0712.3744v2 [stat.CO], http:\/\/arxiv.org\/abs\/0712.3744v2 ."},{"key":"9130_CR52","doi-asserted-by":"crossref","unstructured":"Vidal-Calleja, T., Davison, A., Andrade-Cetto, J., & Murray, D. (2006). Active control for single camera SLAM. In Proc. of the IEEE int. conf. on robotics & automation (pp.\u00a01930\u20131936) 2006.","DOI":"10.1109\/ROBOT.2006.1641988"},{"key":"9130_CR53","unstructured":"Vlassis, N., Toussaint, G. K. M., & Piperidis, S. (2009). Learning model-free robot control using a Monte Carlo em algorithm. Autonomous Robots, 27 (Special issue on Robot Learning, Part\u00a0B) (this issue)."},{"issue":"3","key":"9130_CR54","first-page":"229","volume":"8","author":"R. Williams","year":"1992","unstructured":"Williams, R. (1992). Simple statistical gradient-following algorithms for connectionist reinforcement learning. Machine Learning, 8(3), 229\u2013256.","journal-title":"Machine Learning"},{"key":"9130_CR55","doi-asserted-by":"crossref","first-page":"957","DOI":"10.1016\/S0898-1221(02)00206-7","volume":"44","author":"A. Zilinskas","year":"2002","unstructured":"Zilinskas, A., & Zilinskas, J. (2002). Global optimization based on a statistical model and simplicial partitioning. Computers and Mathematics with Applications, 44, 957\u2013967.","journal-title":"Computers and Mathematics with Applications"}],"container-title":["Autonomous Robots"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10514-009-9130-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10514-009-9130-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10514-009-9130-2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,29]],"date-time":"2019-05-29T19:13:32Z","timestamp":1559157212000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10514-009-9130-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009,8]]},"references-count":55,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2009,8]]}},"alternative-id":["9130"],"URL":"https:\/\/doi.org\/10.1007\/s10514-009-9130-2","relation":{},"ISSN":["0929-5593","1573-7527"],"issn-type":[{"value":"0929-5593","type":"print"},{"value":"1573-7527","type":"electronic"}],"subject":[],"published":{"date-parts":[[2009,8]]}}}