{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,26]],"date-time":"2026-04-26T09:06:58Z","timestamp":1777194418985,"version":"3.51.4"},"reference-count":83,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2018,8,15]],"date-time":"2018-08-15T00:00:00Z","timestamp":1534291200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"name":"National Research Founda- tion for the Doctoral Program of China","award":["20133402110026"],"award-info":[{"award-number":["20133402110026"]}]},{"name":"National Hi-Tech Project of China","award":["2008AA01Z150"],"award-info":[{"award-number":["2008AA01Z150"]}]},{"name":"National Natural Science Foundation of China (CN)","award":["60745002"],"award-info":[{"award-number":["60745002"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61175057"],"award-info":[{"award-number":["61175057"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2018,12]]},"DOI":"10.1007\/s10489-018-1248-5","type":"journal-article","created":{"date-parts":[[2018,8,15]],"date-time":"2018-08-15T05:42:11Z","timestamp":1534311731000},"page":"4998-5018","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Posterior sampling for Monte Carlo planning under uncertainty"],"prefix":"10.1007","volume":"48","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5164-4629","authenticated-orcid":false,"given":"Aijun","family":"Bai","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Feng","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaoping","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,8,15]]},"reference":[{"key":"1248_CR1","unstructured":"Agrawal S, Goyal N (2012) Analysis of thompson sampling for the multi-armed bandit problem. In: Conference on learning theory, pp 39.1\u201339.26"},{"key":"1248_CR2","unstructured":"Agrawal S, Goyal N (2013) Further optimal regret bounds for Thompson sampling. In: Artificial intelligence and statistics, pp 99\u2013107"},{"key":"1248_CR3","unstructured":"Anand A, Mausam GA, Singla P (2015) ASAP-UCT: Abstraction of state-action pairs in UCT. In: Yang Q, Wooldridge M (eds) IJCAI. AAAI Press, pp 1509\u20131515"},{"key":"1248_CR4","doi-asserted-by":"crossref","unstructured":"Anand A, Mausam RN, Singla P (2016) OGA-UCT: On-the-go abstractions in UCT. In: Coles AJ, Coles A, Edelkamp S, Magazzeni D, Sanner S (eds) ICAPS. AAAI Press, pp 29\u2013 37","DOI":"10.1609\/icaps.v26i1.13745"},{"key":"1248_CR5","unstructured":"Asmuth J, Littman ML (2011) Learning is planning: near Bayes-optimal reinforcement learning via Monte-Carlo tree search. In: Uncertainty in artificial intelligence, pp 19\u201326"},{"key":"1248_CR6","first-page":"397","volume":"3","author":"P Auer","year":"2003","unstructured":"Auer P (2003) Using confidence bounds for exploitation-exploration trade-offs. J Mach Learn Res 3:397\u2013422","journal-title":"J Mach Learn Res"},{"issue":"2","key":"1248_CR7","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1023\/A:1013689704352","volume":"47","author":"P Auer","year":"2002","unstructured":"Auer P, Cesa-Bianchi N, Fischer P (2002) Finite-time analysis of the multiarmed bandit problem. Mach Learn 47(2):235\u2013 256","journal-title":"Mach Learn"},{"key":"1248_CR8","unstructured":"Bai A, Srivastava S, Russell S (2016) Markovian state and action abstractions for MDPs via hierarchical MCTS. In: 25th international joint conference on artificial intelligence (IJCAI). New York"},{"key":"1248_CR9","unstructured":"Bai A, Wu F, Chen X (2012) Online planning for large MDPs with MAXQ decomposition (extended abstract). In: van der Hoek W, Padgham L, Conitzer V, Winikoff M (eds) International conference on autonomous agents and multiagent systems, AAMAS 2012, Valencia, Spain, June 4-8, 2012 (3 volumes). IFAAMAS, pp 1215\u20131216"},{"key":"1248_CR10","unstructured":"Bai A, Wu F, Chen X (2013) Bayesian Mixture modelling and inference based Thompson sampling in Monte-Carlo tree search. In: Advances in neural information processing systems 26, pp 1646\u20131654"},{"issue":"4","key":"1248_CR11","first-page":"45:1","volume":"6","author":"A Bai","year":"2015","unstructured":"Bai A, Wu F, Chen X (2015) Online planning for large Markov decision processes with hierarchical decomposition. ACM Trans Intell Syst Technol (TIST) 6(4):45:1\u201345:28","journal-title":"ACM Trans Intell Syst Technol (TIST)"},{"key":"1248_CR12","doi-asserted-by":"crossref","unstructured":"Bai A, Wu F, Zhang Z, Chen X (2014) Thompson sampling based Monte-Carlo planning in POMDPs. In: International conference on automated planning and scheduling (ICAPS)","DOI":"10.1609\/icaps.v24i1.13616"},{"key":"1248_CR13","doi-asserted-by":"crossref","unstructured":"Barrett S, Agmon N, Hazon N, Kraus S, teammates P. Stone. (2014) Communicating with unknown. In: Proceedings of 13th international conference on autonomous agents and multiagent systems (AAMAS 2012)","DOI":"10.3233\/978-1-61499-419-0-45"},{"key":"1248_CR14","doi-asserted-by":"crossref","unstructured":"Barrett S, Stone P, Kraus S, Rosenfeld A (2013) Teamwork with limited knowledge of teammates. In: Proceedings of the twenty-seventh AAAI conference on artificial intelligence","DOI":"10.1609\/aaai.v27i1.8659"},{"issue":"1-2","key":"1248_CR15","doi-asserted-by":"publisher","first-page":"81","DOI":"10.1016\/0004-3702(94)00011-O","volume":"72","author":"A Barto","year":"1995","unstructured":"Barto A, Bradtke S, Singh S (1995) Learning to act using real-time dynamic programming. Artif Intell 72(1-2):81\u2013138","journal-title":"Artif Intell"},{"key":"1248_CR16","volume-title":"Dynamic programming","author":"R Bellman","year":"1957","unstructured":"Bellman R (1957) Dynamic programming, 1st edn. Princeton University Press, Princeton","edition":"1st edn."},{"issue":"1","key":"1248_CR17","doi-asserted-by":"publisher","first-page":"89","DOI":"10.1023\/A:1009634810396","volume":"5","author":"DP Bertsekas","year":"1999","unstructured":"Bertsekas DP, Castanon DA (1999) Rollout algorithms for stochastic scheduling problems. J Heuristics 5 (1):89\u2013108","journal-title":"J Heuristics"},{"key":"1248_CR18","unstructured":"Bonet B, Geffner H (2003) Labeled rtdp: Improving the convergence of real-time dynamic programming. In: International conference on automated planning and scheduling, vol 3"},{"key":"1248_CR19","unstructured":"Bonet B, Geffner H (2012) Action selection for MDPs Anytime AO* vs. UCT. In: AAAI conference on artificial intelligence, pp 1749\u20131755"},{"issue":"1","key":"1248_CR20","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TCIAIG.2012.2186810","volume":"4","author":"C Browne","year":"2012","unstructured":"Browne C, Powley EJ, Whitehouse D, Lucas SM, Cowling PI, Rohlfshagen P, Tavener S, Perez D, Samothrakis S, Colton S (2012) A survey of Monte Carlo, tree search methods. IEEE Trans Comput Intell AI Games 4(1):1\u201343","journal-title":"IEEE Trans Comput Intell AI Games"},{"issue":"1","key":"1248_CR21","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1561\/2200000024","volume":"5","author":"S Bubeck","year":"2012","unstructured":"Bubeck S, Cesa-Bianchi N (2012) Regret analysis of stochastic and nonstochastic multi-armed bandit problems. Found Trends Mach Learn 5(1):1\u2013122","journal-title":"Found Trends Mach Learn"},{"issue":"19","key":"1248_CR22","doi-asserted-by":"publisher","first-page":"1832","DOI":"10.1016\/j.tcs.2010.12.059","volume":"412","author":"S Bubeck","year":"2011","unstructured":"Bubeck S, Munos R, Stoltz G (2011) Pure exploration in finitely-armed and continuous-armed bandits. Theor Comput Sci 412(19):1832\u20131852","journal-title":"Theor Comput Sci"},{"issue":"3","key":"1248_CR23","doi-asserted-by":"publisher","first-page":"309","DOI":"10.1023\/B:DISC.0000028199.78776.c4","volume":"14","author":"HS Chang","year":"2004","unstructured":"Chang HS, Givan R, Chong EK (2004) Parallel rollout for online solution of partially observable Markov decision processes. Discret Event Dyn Syst 14(3):309\u2013341","journal-title":"Discret Event Dyn Syst"},{"key":"1248_CR24","unstructured":"Chapelle O, Li L (2011) An empirical evaluation of Thompson sampling. In: Advances neural information processing systems, pp 2249\u20132257"},{"key":"1248_CR25","unstructured":"Chaslot G, Bakkes S, Szita I, Spronck P (2008) Monte-Carlo tree search: a new framework for game AI. In: Darken C, Mateas M (eds) Proceedings of the fourth artificial intelligence and interactive digital entertainment conference. The AAAI Press, Stanford"},{"key":"1248_CR26","volume-title":"Asymptotic theory of statistics and probability","author":"A DasGupta","year":"2008","unstructured":"DasGupta A (2008) Asymptotic theory of statistics and probability. Springer, Berlin"},{"key":"1248_CR27","unstructured":"Dearden R, Friedman N, Russell S (1998) Bayesian Q-learning. In: AAAI conference on artificial intelligence, pp 761\u2013768"},{"key":"1248_CR28","volume-title":"Probability and statistics","author":"MH DeGroot","year":"2002","unstructured":"DeGroot MH, Schervish MJ (2002) Probability and statistics. Addison Wesley, Boston"},{"issue":"1","key":"1248_CR29","first-page":"63","volume":"13","author":"TG Dietterich","year":"1999","unstructured":"Dietterich TG (1999) Hierarchical reinforcement learning with the MAXQ value function decomposition. J Mach Learn Res 13(1):63","journal-title":"J Mach Learn Res"},{"key":"1248_CR30","doi-asserted-by":"crossref","unstructured":"Eyerich P, Keller T, Helmert M (2010) High-quality policies for the Canadian traveler\u2019s problem. In: AAAI conference on artificial intelligence, pp 51\u201358","DOI":"10.1609\/aaai.v24i1.7542"},{"key":"1248_CR31","unstructured":"Feldman Z, Domshlak C (2012) Simple regret optimization in online planning for Markov decision processes. In: AAAI conference on artificial intelligence"},{"key":"1248_CR32","doi-asserted-by":"crossref","unstructured":"Feldman Z, Domshlak C (2014) On MABs and separation of concerns in Monte-Carlo planning for MDPs. In: Chien SA, Do MB, Fern A, Ruml W (eds) ICAPS. AAAI","DOI":"10.1609\/icaps.v24i1.13631"},{"key":"1248_CR33","unstructured":"Feng Z, Hansen E (2002) Symbolic heuristic search for factored Markov decision processes. In: AAAI\/IAAI, pp 455\u2013460"},{"key":"1248_CR34","first-page":"259","volume":"8","author":"H Finnsson","year":"2008","unstructured":"Finnsson H, Bj\u00f6rnsson Y (2008) Simulation-based approach to general game playing. AAAI 8:259\u2013264","journal-title":"AAAI"},{"key":"1248_CR35","doi-asserted-by":"crossref","unstructured":"Forbes C, Evans M (2011). In: Hastings N, Peacock B (eds) Statistical distributions. Wiley, Nwe York","DOI":"10.1002\/9780470627242"},{"key":"1248_CR36","doi-asserted-by":"crossref","unstructured":"Gelly S, Silver D (2007) Combining online and offline knowledge in UCT. In: Proceedings of the 24th international conference on machine learning. ACM, pp 273\u2013280","DOI":"10.1145\/1273496.1273531"},{"issue":"11","key":"1248_CR37","doi-asserted-by":"publisher","first-page":"1856","DOI":"10.1016\/j.artint.2011.03.007","volume":"175","author":"S Gelly","year":"2011","unstructured":"Gelly S, Silver D (2011) Monte-Carlo Tree search and rapid action value estimation in computer Go. Artif Intell 175(11):1856\u20131875","journal-title":"Artif Intell"},{"key":"1248_CR38","unstructured":"Gopalan A, Mannor S, Mansour Y (2014) Thompson sampling for complex online problems. In: Proceedings of the 31st international conference on machine learning, pp 100\u2013108"},{"key":"1248_CR39","doi-asserted-by":"crossref","unstructured":"Gordon NJ, Salmond DJ, Smith AF (1993) Novel approach to nonlinear\/non-Gaussian bayesian state estimation. In: IEE Proceedings F (radar and signal processing), vol 140. IET, pp 107\u2013113","DOI":"10.1049\/ip-f-2.1993.0015"},{"key":"1248_CR40","unstructured":"Grzes M, Poupart P (2014) Pomdp planning and execution in an augmented space. In: Proceedings of the 2014 international conference on autonomous agents and multi-agent systems. International Foundation for Autonomous Agents and Multiagent Systems, pp 757\u2013764"},{"key":"1248_CR41","unstructured":"Grze\u015b M, Poupart P, Hoey J (2013) Isomorph-free branch and bound search for finite state controllers. In: Proceedings of the twenty-third international joint conference on artificial intelligence. AAAI Press, pp 2282\u20132290"},{"key":"1248_CR42","unstructured":"Guez A, Silver D, Dayan P (2012) Efficient Bayes-adaptive reinforcement learning using sample-based search. In: Advances in neural information processing systems, pp 1034\u20131042"},{"issue":"1-2","key":"1248_CR43","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1016\/S0004-3702(01)00106-0","volume":"129","author":"E Hansen","year":"2001","unstructured":"Hansen E, Zilberstein S (2001) LAO* A heuristic search algorithm that finds solutions with loops. Artif Intell 129(1-2):35\u201362","journal-title":"Artif Intell"},{"issue":"3","key":"1248_CR44","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1109\/TSSC.1968.300117","volume":"4","author":"ET Jaynes","year":"1968","unstructured":"Jaynes ET (1968) Prior probabilities. IEEE Trans Syst Sci Cybern 4(3):227\u2013241","journal-title":"IEEE Trans Syst Sci Cybern"},{"key":"1248_CR45","doi-asserted-by":"publisher","first-page":"299","DOI":"10.1214\/154957804100000051","volume":"1","author":"GL Jones","year":"2004","unstructured":"Jones GL (2004) On the Markov chain central limit theorem. Probab Surv 1:299\u2013320","journal-title":"Probab Surv"},{"issue":"1-2","key":"1248_CR46","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1016\/S0004-3702(98)00023-X","volume":"101","author":"LP Kaelbling","year":"1998","unstructured":"Kaelbling LP, Littman ML, Cassandra AR (1998) Planning and acting in partially observable stochastic domains. Artif Intell 101(1-2):99\u2013134","journal-title":"Artif Intell"},{"key":"1248_CR47","doi-asserted-by":"publisher","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"LP Kaelbling","year":"1996","unstructured":"Kaelbling LP, Littman ML, Moore AW (1996) Reinforcement learning: a survey. J Artif Intell Res 4:237\u2013285","journal-title":"J Artif Intell Res"},{"key":"1248_CR48","doi-asserted-by":"crossref","unstructured":"Kaufmann E, Korda N, Munos R (2012) Thompson sampling An optimal finite time analysis. In: Algorithmic Learning Theory, pp 199\u2013213","DOI":"10.1007\/978-3-642-34106-9_18"},{"key":"1248_CR49","unstructured":"Kearns M, Mansour Y, Ng A (1999) A sparse sampling algorithm for near-optimal planning in large Markov decision processes. In: Proceedings of the 16th international joint conference on artificial intelligence, vol 2. Morgan Kaufmann Publishers Inc, pp 1324\u20131331"},{"key":"1248_CR50","doi-asserted-by":"crossref","unstructured":"Keller T, Eyerich P (2012) Prost: Probabilistic planning based on UCT. In: ICAPS12","DOI":"10.1609\/icaps.v22i1.13518"},{"key":"1248_CR51","doi-asserted-by":"crossref","unstructured":"Keller T, Helmert M (2013) Trial-based heuristic tree search for finite horizon MDPs. In: Proceedings of the 23rd international conference on automated planning and scheduling (ICAPS), pp 135\u2013143","DOI":"10.1609\/icaps.v23i1.13557"},{"key":"1248_CR52","first-page":"282","volume-title":"Lecture Notes in Computer Science","author":"Levente Kocsis","year":"2006","unstructured":"Kocsis L, Szepesv\u00e1ri C (2006) Bandit based Monte-Carlo planning. In: European conference on machine learning, pp 282\u2013293"},{"key":"1248_CR53","unstructured":"Korda N, Kaufmann E, Munos R (2013) Thompson sampling for 1-dimensional exponential family bandits. In: Burges C, Bottou L, Welling M, Ghahramani Z, Weinberger K (eds) Advances in neural information processing systems 26. Curran Associates, Inc, pp 1448\u20131456"},{"key":"1248_CR54","unstructured":"Kurniawati H, Hsu D, Lee WS (2008) SARSOP efficient point-based POMDP planning by approximating optimally reachable belief spaces. In: Robotics: science and systems, pp 65\u201372"},{"key":"1248_CR55","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1016\/0196-8858(85)90002-8","volume":"6","author":"T Lai","year":"1985","unstructured":"Lai T, Robbins H (1985) Asymptotically efficient adaptive allocation rules. Adv Appl Math 6:4\u201322","journal-title":"Adv Appl Math"},{"key":"1248_CR56","unstructured":"Macindoe O, Kaelbling LP, Lozano-P\u00e9rez T (2012) POMCoP: Belief space planning for sidekicks in cooperative games. In: Riedl M, Sukthankar G (eds) Proceedings of the eighth AAAI conference on artificial intelligence and interactive digital entertainment, AIIDE-12. The AAAI Press, Stanford"},{"key":"1248_CR57","unstructured":"McAllester DA, Singh S (1999) Approximate planning for factored pomdps using belief state simplification. In: Proceedings of the fifteenth conference on uncertainty in artificial intelligence. Morgan Kaufmann Publishers Inc, pp 409\u2013416"},{"key":"1248_CR58","doi-asserted-by":"crossref","unstructured":"McMahan HB, Likhachev M, Gordon G (2005) Bounded real-time dynamic programming: Rtdp with monotone upper bounds and performance guarantees. In: Proceedings of the 22nd international conference on machine learning. ACM, pp 569\u2013576","DOI":"10.1145\/1102351.1102423"},{"key":"1248_CR59","unstructured":"Osband I, Russo D, Van Roy B (2013) (more) efficient reinforcement learning via posterior sampling. In: Advances in neural information processing systems, pp 3003\u20133011"},{"issue":"1","key":"1248_CR60","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1016\/0304-3975(91)90263-2","volume":"84","author":"CH Papadimitriou","year":"1991","unstructured":"Papadimitriou CH, Yannakakis M (1991) Shortest paths without a map. Theor Comput Sci 84(1):127\u2013150","journal-title":"Theor Comput Sci"},{"key":"1248_CR61","unstructured":"Paquet S, Chaib-draa B, Ross S (2006) Hybrid POMDP Algorithms. In: Proceedings of the workshop on multi-agent sequential decision making in uncertain domains (MSDM-06). Citeseer, pp 133\u2013147"},{"key":"1248_CR62","doi-asserted-by":"crossref","unstructured":"Paquet S, Tobin L, Chaib-draa B (2005) Real-time decision making for large POMDPs. In Advances in artificial intelligence. Springer, pp 450\u2013455","DOI":"10.1007\/11424918_49"},{"key":"1248_CR63","unstructured":"Pineau J, Gordon G, Thrun S, et al. (2003) Point-based value iteration: an anytime algorithm for POMDPs. In: IJCAI, vol 3, pp 1025\u20131032"},{"key":"1248_CR64","doi-asserted-by":"publisher","DOI":"10.1002\/9780470316887","volume-title":"Markov decision processes: discrete stochastic dynamic programming","author":"ML Puterman","year":"1994","unstructured":"Puterman ML (1994) Markov decision processes: discrete stochastic dynamic programming. Wiley, New York"},{"key":"1248_CR65","unstructured":"Ross S, Chaib-Draa B, et al. (2007) Aems: an anytime online search algorithm for approximate policy refinement in large POMDPs. In: IJCAI, pp 2592\u20132598"},{"issue":"1","key":"1248_CR66","doi-asserted-by":"publisher","first-page":"663","DOI":"10.1613\/jair.2567","volume":"32","author":"S Ross","year":"2008","unstructured":"Ross S, Pineau J, Paquet S, Chaib-Draa B (2008) Online planning algorithms for POMDPs. J Artif Intell Res 32(1):663\u2013704","journal-title":"J Artif Intell Res"},{"key":"1248_CR67","unstructured":"Sanner S, Goetschalckx R, Driessens K, Shani G (2009) Bayesian real-time dynamic programming. In: IJCAI, pp 1784\u20131789"},{"issue":"7587","key":"1248_CR68","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver D, Huang A, Maddison CJ, Guez A, Sifre L, Van Den Driessche G, Schrittwieser J, Antonoglou I, Panneershelvam V, Lanctot M et al (2016) Mastering the game of Go with deep neural networks and tree search. Nature 529(7587):484\u2013489","journal-title":"Nature"},{"issue":"7676","key":"1248_CR69","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver D, Schrittwieser J, Simonyan K, Antonoglou I, Huang A, Guez A, Hubert T, Baker L, Lai M, Bolton A et al (2017) Mastering the game of go without human knowledge. Nature 550(7676):354","journal-title":"Nature"},{"key":"1248_CR70","unstructured":"Silver D, Veness J (2010) Monte-Carlo planning in large POMDPs. In: Advances in neural information processing systems, pp 2164\u20132172"},{"key":"1248_CR71","unstructured":"Smith T, Simmons R (2004) Heuristic search value iteration for POMDPs. In: Proceedings of the 20th conference on uncertainty in artificial intelligence. AUAI Press, pp 520\u2013527"},{"key":"1248_CR72","unstructured":"Somani A, Ye N, Hsu D, Lee WS (2013) DESPOT: Online POMDP planning with regularization. In: Burges C, Bottou L, Welling M, Ghahramani Z, Weinberger K (eds) Advances in neural information processing systems 26. Curran Associates, Inc, pp 1772\u20131780"},{"key":"1248_CR73","volume-title":"Reinforcement learning: An introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton RS, Barto AG (1998) Reinforcement learning: An introduction. The MIT Press, Cambridge"},{"key":"1248_CR74","unstructured":"Tesauro G, Rajan VT, Segal R (2010) Bayesian inference in Monte-Carlo tree search. In: Uncertainty in artificial intelligence, pp 580\u2013588"},{"key":"1248_CR75","doi-asserted-by":"publisher","first-page":"285","DOI":"10.1093\/biomet\/25.3-4.285","volume":"25","author":"WR Thompson","year":"1933","unstructured":"Thompson WR (1933) On the likelihood that one unknown probability exceeds another in view of the evidence of two samples. Biometrika 25:285\u2013294","journal-title":"Biometrika"},{"key":"1248_CR76","unstructured":"Thrun S (1999) Monte Carlo POMDPs. In: NIPS, vol 12, pp 1064\u20131070"},{"key":"1248_CR77","unstructured":"Tolpin D, Shimony SE (2012) MCTS based on simple regret. In: AAAI conference on artificial intelligence"},{"issue":"2","key":"1248_CR78","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1007\/s10489-012-0416-2","volume":"39","author":"NA Vien","year":"2013","unstructured":"Vien NA, Ertel W, Dang V -H, Chung T (2013) Monte-Carlo tree search for bayesian reinforcement learning. Appl Intell 39(2):345\u2013353","journal-title":"Appl Intell"},{"key":"1248_CR79","doi-asserted-by":"crossref","unstructured":"Wang T, Lizotte D, Bowling M, Schuurmans D (2005) Bayesian sparse sampling for on-line reward optimization. In: Proceedings of the 22nd international conference on machine learning. ACM, pp 956\u2013963","DOI":"10.1145\/1102351.1102472"},{"key":"1248_CR80","doi-asserted-by":"crossref","unstructured":"Washington R (1997) BI-POMDP: bounded, incremental partially-observable Markov-model planning. In: Recent advances in AI planning. Springer, pp 440\u2013451","DOI":"10.1007\/3-540-63912-8_105"},{"issue":"4","key":"1248_CR81","doi-asserted-by":"publisher","first-page":"239","DOI":"10.1109\/TCIAIG.2010.2061050","volume":"2","author":"MH Winands","year":"2010","unstructured":"Winands MH, Bjornsson Y, Saito J (2010) Monte Carlo tree search in lines of action. IEEE Trans Comput Intell AI Games 2(4):239\u2013250","journal-title":"IEEE Trans Comput Intell AI Games"},{"key":"1248_CR82","unstructured":"Wu F, Zilberstein S, Chen X (2011) Online planning for ad hoc autonomous agent teams. In: International joint conference on artificial intelligence, pp 439\u2013445"},{"key":"1248_CR83","unstructured":"Zhang Z, Chen X (2012) FHHOP a factored hybrid heuristic online planning algorithm for large POMDPs. In: Proceedings of the 28th conference on uncertainty in artificial intelligence. Catalina Island, pp 934\u2013943"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10489-018-1248-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-018-1248-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-018-1248-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,6]],"date-time":"2025-07-06T10:20:56Z","timestamp":1751797256000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10489-018-1248-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,8,15]]},"references-count":83,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2018,12]]}},"alternative-id":["1248"],"URL":"https:\/\/doi.org\/10.1007\/s10489-018-1248-5","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,8,15]]},"assertion":[{"value":"15 August 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}