{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T12:08:06Z","timestamp":1777637286389,"version":"3.51.4"},"reference-count":42,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2013,2,22]],"date-time":"2013-02-22T00:00:00Z","timestamp":1361491200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2013,9]]},"DOI":"10.1007\/s10489-012-0416-2","type":"journal-article","created":{"date-parts":[[2013,2,21]],"date-time":"2013-02-21T06:09:28Z","timestamp":1361426968000},"page":"345-353","source":"Crossref","is-referenced-by-count":13,"title":["Monte-Carlo tree search for Bayesian reinforcement learning"],"prefix":"10.1007","volume":"39","author":[{"given":"Ngo Anh","family":"Vien","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wolfgang","family":"Ertel","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Viet-Hung","family":"Dang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"TaeChoong","family":"Chung","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2013,2,22]]},"reference":[{"key":"416_CR1","volume-title":"Proceedings of the 25th conference on uncertainty in artificial intelligence (UAI-09)","author":"J Asmuth","year":"2009","unstructured":"Asmuth J, Li L, Littman ML, Nouri A, Wingate D (2009) A Bayesian sampling approach to exploration in reinforcement learning. In: Proceedings of the 25th conference on uncertainty in artificial intelligence (UAI-09)"},{"key":"416_CR2","first-page":"19","volume-title":"Proceedings of the twenty-seventh conference on uncertainty in artificial intelligence","author":"J Asmuth","year":"2011","unstructured":"Asmuth J, Littman ML (2011) Learning is planning: near Bayes-optimal reinforcement learning via Monte-Carlo tree search. In: Proceedings of the twenty-seventh conference on uncertainty in artificial intelligence, pp 19\u201326"},{"issue":"2\u20133","key":"416_CR3","doi-asserted-by":"crossref","first-page":"235","DOI":"10.1023\/A:1013689704352","volume":"47","author":"P Auer","year":"2002","unstructured":"Auer P, Cesa-Bianchi N, Fischer P (2002) Finite-time analysis of the multiarmed bandit problem. Mach Learn 47(2\u20133):235\u2013256","journal-title":"Mach Learn"},{"issue":"3","key":"416_CR4","doi-asserted-by":"crossref","first-page":"243","DOI":"10.1023\/A:1007634325138","volume":"40","author":"J Baxter","year":"2000","unstructured":"Baxter J, Tridgell A, Weaver L (2000) Learning to play chess using temporal differences. Mach Learn 40(3):243\u2013263","journal-title":"Mach Learn"},{"key":"416_CR5","first-page":"213","volume":"3","author":"RI Brafman","year":"2002","unstructured":"Brafman RI, Tennenholtz M (2002) R-max\u2014a general polynomial time algorithm for near-optimal reinforcement learning. J Mach Learn Res 3:213\u2013231","journal-title":"J Mach Learn Res"},{"key":"416_CR6","series-title":"Proceedings of the 20th international joint conference on artificial intelligence","first-page":"2437","volume-title":"IJCAI 2007","author":"PS Castro","year":"2007","unstructured":"Castro PS, Precup D (2007) Using linear programming for Bayesian exploration in Markov decision processes. In: IJCAI 2007. Proceedings of the 20th international joint conference on artificial intelligence, Hyderabad, India, January 6\u201312, 2007, pp\u00a02437\u20132442"},{"key":"416_CR7","first-page":"761","volume-title":"Proceedings of the fifteenth national conference on artificial intelligence and tenth innovative applications of artificial intelligence conference, AAAI\/IAAI 98","author":"R Dearden","year":"1998","unstructured":"Dearden R, Friedman N, Russell SJ (1998) Bayesian Q-learning. In: Proceedings of the fifteenth national conference on artificial intelligence and tenth innovative applications of artificial intelligence conference, AAAI\/IAAI 98, Madison, WI, USA, July 26\u201330, 1998, pp 761\u2013768"},{"key":"416_CR8","unstructured":"Duff M (2002) Optimal learning: computational procedures for Bayes-adaptive Markov decision processes. PhD thesis, University of Massassachusetts Amherst"},{"key":"416_CR9","first-page":"154","volume-title":"International conference on machine learning (ICML)","author":"Y Engel","year":"2003","unstructured":"Engel Y, Mannor S, Meir R (2003) Bayes meets bellman: the Gaussian process approach to temporal difference learning. In: International conference on machine learning (ICML), pp 154\u2013161"},{"key":"416_CR10","doi-asserted-by":"crossref","first-page":"201","DOI":"10.1145\/1102351.1102377","volume-title":"International conference on machine learning (ICML)","author":"Y Engel","year":"2005","unstructured":"Engel Y, Mannor S, Meir R (2005) Reinforcement learning with Gaussian processes. In: International conference on machine learning (ICML), pp 201\u2013208"},{"key":"416_CR11","doi-asserted-by":"crossref","first-page":"273","DOI":"10.1145\/1273496.1273531","volume-title":"International conference on machine learning (ICML)","author":"S Gelly","year":"2007","unstructured":"Gelly S, Silver D (2007) Combining online and offline knowledge in uct. In: International conference on machine learning (ICML), pp 273\u2013280"},{"key":"416_CR12","first-page":"457","volume-title":"Advances in neural information processing (NIPS)","author":"M Ghavamzadeh","year":"2006","unstructured":"Ghavamzadeh M, Engel Y (2006) Bayesian policy gradient algorithms. In: Advances in neural information processing (NIPS), pp\u00a0457\u2013464"},{"key":"416_CR13","doi-asserted-by":"crossref","first-page":"297","DOI":"10.1145\/1273496.1273534","volume-title":"International conference on machine learning (ICML)","author":"M Ghavamzadeh","year":"2007","unstructured":"Ghavamzadeh M, Engel Y (2007) Bayesian actor-critic algorithms. In: International conference on machine learning (ICML), pp 297\u2013304"},{"key":"416_CR14","doi-asserted-by":"crossref","unstructured":"Granmo OC, Glimsdal S (2012) Accelerated Bayesian learning for decentralized two-armed bandit based decision making with applications to the goore game. Appl Intell","DOI":"10.1007\/s10489-012-0346-z"},{"issue":"1","key":"416_CR15","doi-asserted-by":"crossref","first-page":"71","DOI":"10.1023\/B:APIN.0000011143.95085.74","volume":"20","author":"J Hong","year":"2004","unstructured":"Hong J, Prabhu VV (2004) Distributed reinforcement learning control for batch sequencing and sizing in just-in-time manufacturing systems. Appl Intell 20(1):71\u201387","journal-title":"Appl Intell"},{"key":"416_CR16","volume-title":"Advances in neural information processing (NIPS)","author":"D Hsu","year":"2007","unstructured":"Hsu D, Lee WS, Rong N (2007) What makes some POMDP problems easy to approximate? In: Advances in neural information processing (NIPS)"},{"issue":"1","key":"416_CR17","doi-asserted-by":"crossref","first-page":"89","DOI":"10.1007\/s10489-008-0115-1","volume":"31","author":"A Iglesias","year":"2009","unstructured":"Iglesias A, Mart\u00ednez P, Aler R, Fern\u00e1ndez F (2009) Learning teaching strategies in an adaptive and intelligent educational system through reinforcement learning. Appl Intell 31(1):89\u2013106","journal-title":"Appl Intell"},{"key":"416_CR18","first-page":"306","volume-title":"International conference on machine learning (ICML)","author":"S Kakade","year":"2003","unstructured":"Kakade S, Kearns MJ, Langford J (2003) Exploration in metric state spaces. In: International conference on machine learning (ICML), pp 306\u2013312"},{"issue":"2\u20133","key":"416_CR19","doi-asserted-by":"crossref","first-page":"209","DOI":"10.1023\/A:1017984413808","volume":"49","author":"MJ Kearns","year":"2002","unstructured":"Kearns MJ, Singh SP (2002) Near-optimal reinforcement learning in polynomial time. Mach Learn 49(2\u20133):209\u2013232","journal-title":"Mach Learn"},{"key":"416_CR20","first-page":"282","volume-title":"European conference on machine learning (ECML)","author":"L Kocsis","year":"2006","unstructured":"Kocsis L, Szepesv\u00e1ri C (2006) Bandit based Monte-Carlo planning. In: European conference on machine learning (ECML), pp\u00a0282\u2013293"},{"key":"416_CR21","first-page":"65","volume-title":"International conference on machine learning (ICML)","author":"JZ Kolter","year":"2009","unstructured":"Kolter JZ, Ng AY (2009) Near-Bayesian exploration in polynomial time. In: International conference on machine learning (ICML), p 65"},{"issue":"2","key":"416_CR22","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1007\/s10489-009-0191-x","volume":"34","author":"J Li","year":"2011","unstructured":"Li J, Li Z, Chen J (2011) Microassembly path planning using reinforcement learning for improving positioning accuracy of a 1 cm3 omni-directional mobile microrobot. Appl Intell 34(2):211\u2013225","journal-title":"Appl Intell"},{"key":"416_CR23","doi-asserted-by":"crossref","unstructured":"Pakizeh E, Palhang M, Pedram MM (2012) Multi-criteria expertness based cooperative Q-learning. Appl Intell","DOI":"10.1007\/s10489-012-0392-6"},{"key":"416_CR24","doi-asserted-by":"crossref","first-page":"697","DOI":"10.1145\/1143844.1143932","volume-title":"International conference on machine learning (ICML)","author":"P Poupart","year":"2006","unstructured":"Poupart P, Vlassis NA, Hoey J, Regan K (2006) An analytic solution to discrete Bayesian reinforcement learning. In: International conference on machine learning (ICML), pp 697\u2013704"},{"key":"416_CR25","volume-title":"Advances in neural information processing (NIPS)","author":"S Ross","year":"2007","unstructured":"Ross S, Chaib-draa B, Pineau J (2007) Bayes-adaptive POMDPs. In: Advances in neural information processing (NIPS)"},{"key":"416_CR26","first-page":"476","volume-title":"Proceedings of the 24th conference in uncertainty in artificial intelligence","author":"S Ross","year":"2008","unstructured":"Ross S, Pineau J (2008) Model-based Bayesian reinforcement learning in large structured domains. In: Proceedings of the 24th conference in uncertainty in artificial intelligence, pp 476\u2013483"},{"key":"416_CR27","volume-title":"Artificial intelligence: a modern approach","author":"SJ Russell","year":"2003","unstructured":"Russell SJ, Norvig P (2003) Artificial intelligence: a modern approach, 2nd edn. Prentice Hall, Upper Saddle River","edition":"2"},{"issue":"3","key":"416_CR28","doi-asserted-by":"crossref","first-page":"210","DOI":"10.1147\/rd.33.0210","volume":"3","author":"AL Samuel","year":"1959","unstructured":"Samuel AL (1959) Some studies in machine learning using the game of checkers. IBM J Res Dev 3(3):210\u2013229","journal-title":"IBM J Res Dev"},{"key":"416_CR29","first-page":"2164","volume-title":"Advances in neural information processing (NIPS)","author":"D Silver","year":"2010","unstructured":"Silver D, Veness J (2010) Monte-Carlo planning in large POMDPs. In: Advances in neural information processing (NIPS), pp 2164\u20132172"},{"key":"416_CR30","first-page":"974","volume-title":"Advances in neural information processing systems","author":"SP Singh","year":"1996","unstructured":"Singh SP, Bertsekas D (1996) Reinforcement learning for dynamic channel allocation in cellular telephone systems. In: Advances in neural information processing systems, vol NIPS, pp 974\u2013980"},{"issue":"8","key":"416_CR31","doi-asserted-by":"crossref","first-page":"1309","DOI":"10.1016\/j.jcss.2007.08.009","volume":"74","author":"AL Strehl","year":"2008","unstructured":"Strehl AL, Littman ML (2008) An analysis of model-based interval estimation for Markov decision processes. J Comput Syst Sci 74(8):1309\u20131331","journal-title":"J Comput Syst Sci"},{"key":"416_CR32","first-page":"943","volume-title":"Proceedings of the seventeenth international conference on machine learning (ICML 2000)","author":"MJA Strens","year":"2000","unstructured":"Strens MJA (2000) A Bayesian framework for reinforcement learning. In: Proceedings of the seventeenth international conference on machine learning (ICML 2000). Stanford University, Stanford, CA, USA, June 29\u2013July 2, 2000, pp 943\u2013950"},{"key":"416_CR33","volume-title":"Reinforcement learning: an introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton RS, Barto AG (1998) Reinforcement learning: an introduction. MIT Press, Cambridge"},{"key":"416_CR34","first-page":"1031","volume-title":"International conference on machine learning (ICML)","author":"I Szita","year":"2010","unstructured":"Szita I, Szepesv\u00e1ri C (2010) Model-based reinforcement learning with nearly tight exploration complexity bounds. In: International conference on machine learning (ICML), pp 1031\u20131038"},{"key":"416_CR35","first-page":"257","volume":"8","author":"G Tesauro","year":"1992","unstructured":"Tesauro G (1992) Practical issues in temporal difference learning. Mach Learn 8:257\u2013277","journal-title":"Mach Learn"},{"issue":"2","key":"416_CR36","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1162\/neco.1994.6.2.215","volume":"6","author":"G Tesauro","year":"1994","unstructured":"Tesauro G (1994) Td-gammon, a self-teaching backgammon program, achieves master-level play. Neural Comput 6(2):215\u2013219","journal-title":"Neural Comput"},{"issue":"3","key":"416_CR37","doi-asserted-by":"crossref","first-page":"58","DOI":"10.1145\/203330.203343","volume":"38","author":"G Tesauro","year":"1995","unstructured":"Tesauro G (1995) Temporal difference learning and td-gammon. Commun ACM 38(3):58\u201368","journal-title":"Commun ACM"},{"issue":"6","key":"416_CR38","doi-asserted-by":"crossref","first-page":"2008","DOI":"10.1587\/transcom.E92.B.2008","volume":"92-B","author":"NA Vien","year":"2009","unstructured":"Vien NA, Viet NH, Lee S, Chung T (2009) Policy gradient SMDP for resource allocation and routing in integrated services networks. IEICE Trans 92-B(6):2008\u20132022","journal-title":"IEICE Trans"},{"issue":"9","key":"416_CR39","doi-asserted-by":"crossref","first-page":"1671","DOI":"10.1016\/j.ins.2011.01.001","volume":"181","author":"NA Vien","year":"2011","unstructured":"Vien NA, Yu H, Chung T (2011) Hessian matrix distribution for Bayesian policy gradient reinforcement learning. Inf Sci 181(9):1671\u20131685","journal-title":"Inf Sci"},{"key":"416_CR40","first-page":"11","volume-title":"Proceedings of the twenty-fourth AAAI conference on artificial intelligence (AAAI 2010)","author":"TJ Walsh","year":"2010","unstructured":"Walsh TJ, Goschin S, Littman ML (2010) Integrating sample-based planning and model-based reinforcement learning. In: Proceedings of the twenty-fourth AAAI conference on artificial intelligence (AAAI 2010), Atlanta, GA, USA, July 11\u201315, 2010, pp\u00a011\u201315"},{"key":"416_CR41","doi-asserted-by":"crossref","first-page":"956","DOI":"10.1145\/1102351.1102472","volume-title":"International conference on machine learning (ICML)","author":"T Wang","year":"2005","unstructured":"Wang T, Lizotte DJ, Bowling MH, Schuurmans D (2005) Bayesian sparse sampling for on-line reward optimization. In: International conference on machine learning (ICML), pp 956\u2013963"},{"key":"416_CR42","first-page":"1114","volume-title":"International joint conferences on artificial intelligence","author":"W Zhang","year":"1995","unstructured":"Zhang W, Dietterich TG (1995) A reinforcement learning approach to job-shop scheduling. In: International joint conferences on artificial intelligence, pp 1114\u20131120"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-012-0416-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10489-012-0416-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-012-0416-2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,2,9]],"date-time":"2022-02-09T03:44:01Z","timestamp":1644378241000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10489-012-0416-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,2,22]]},"references-count":42,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2013,9]]}},"alternative-id":["416"],"URL":"https:\/\/doi.org\/10.1007\/s10489-012-0416-2","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2013,2,22]]}}}