{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,1,10]],"date-time":"2024-01-10T01:09:47Z","timestamp":1704848987341},"reference-count":65,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2016,1,29]],"date-time":"2016-01-29T00:00:00Z","timestamp":1454025600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2016,7]]},"DOI":"10.1007\/s10489-015-0742-2","type":"journal-article","created":{"date-parts":[[2016,1,29]],"date-time":"2016-01-29T05:03:01Z","timestamp":1454043781000},"page":"112-126","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Bayes-adaptive hierarchical MDPs"],"prefix":"10.1007","volume":"45","author":[{"given":"Ngo Anh","family":"Vien","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"SeungGwan","family":"Lee","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"TaeChoong","family":"Chung","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2016,1,29]]},"reference":[{"key":"742_CR1","doi-asserted-by":"crossref","unstructured":"Abbeel P, Coates A, Quigley M, Ng AY (2006) An application of reinforcement learning to aerobatic helicopter flight","DOI":"10.7551\/mitpress\/7503.003.0006"},{"key":"742_CR2","unstructured":"Asmuth J, Littman ML (2011) Learning is planning: near Bayes-optimal reinforcement learning via Monte-Carlo tree search. In: UAI, p 19\u201326"},{"key":"742_CR3","unstructured":"Atkeson CG (1997) Nonparametric model-based reinforcement learning. In: Advances in Neural Information Processing Systems (NIPS)"},{"issue":"2-3","key":"742_CR4","doi-asserted-by":"crossref","first-page":"235","DOI":"10.1023\/A:1013689704352","volume":"47","author":"P Auer","year":"2002","unstructured":"Auer P, Cesa-Bianchi N, Fischer P (2002) Finite-time analysis of the multiarmed bandit problem. Mach Learn 47(2-3):235\u2013 256","journal-title":"Mach Learn"},{"key":"742_CR5","doi-asserted-by":"crossref","unstructured":"Bai H, Hsu D, Lee WS, Vien NA (2010) Monte Carlo value iteration for continuous-state POMDPs. In: Algorithmic foundations of robotics IX, p 175\u2013191","DOI":"10.1007\/978-3-642-17452-0_11"},{"issue":"4","key":"742_CR6","doi-asserted-by":"crossref","first-page":"341","DOI":"10.1023\/A:1025696116075","volume":"13","author":"AG Barto","year":"2003","unstructured":"Barto AG, Mahadevan S (2003) Recent advances in hierarchical reinforcement learning. Discrete Event Dynamic Systems 13(4):341\u2013379","journal-title":"Discrete Event Dynamic Systems"},{"issue":"3","key":"742_CR7","doi-asserted-by":"crossref","first-page":"243","DOI":"10.1023\/A:1007634325138","volume":"40","author":"J Baxter","year":"2000","unstructured":"Baxter J, Tridgell A, Weaver L (2000) Learning to play chess using temporal differences. Mach Learn 40(3):243\u2013263","journal-title":"Mach Learn"},{"key":"742_CR8","unstructured":"Cao F, Ray S, Bottou L (2012) Bayesian hierarchical reinforcement learning. In: 0Bartlett P, Pereira F, Burges C, Weinberger K (eds) Advances in Neural Information Processing Systems (NIPS), pp 73\u201381"},{"key":"742_CR9","unstructured":"Castro PS, Precup D (2007) Using Linear Programming for Bayesian exploration in Markov decision processes. In: IJCAI, p 2437\u20132442"},{"issue":"3","key":"742_CR10","first-page":"15:1","volume":"4","author":"H Cuaya\u0307huitl","year":"2014","unstructured":"Cuaya\u0307huitl H, Kruijff-Korbayova\u0307 I, Dethlefs N (2014) Nonstrict hierarchical reinforcement learning for interactive systems and robots. TiiS 4(3):15:1\u201315, 30","journal-title":"TiiS"},{"key":"742_CR11","unstructured":"Dearden R, Friedman N, Russell SJ (1998) Bayesian Q-learning. In: AAAI, p 761\u2013768"},{"key":"742_CR12","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1613\/jair.639","volume":"13","author":"TG Dietterich","year":"2000","unstructured":"Dietterich TG (2000) Hierarchical reinforcement learning with the MAXQ value function decomposition. J Artif Intell Res (JAIR) 13:227\u2013303","journal-title":"J Artif Intell Res (JAIR)"},{"key":"742_CR13","unstructured":"Duff M (2002) Optimal learning: Computational procedures for Bayes-adaptive Markov decision processes. PhD thesis, University of Massassachusetts Amherst"},{"key":"742_CR14","unstructured":"Engel Y, Mannor S, Meir R (2003) Bayes meets Bellman: The Gaussian process approach to temporal difference learning. In: Proceedings of the International Conference on Machine Learning, p 154\u2013161"},{"key":"742_CR15","doi-asserted-by":"crossref","unstructured":"Engel Y, Mannor S, Meir R (2005) Reinforcement learning with Gaussian processes. In: Proceedings of the International Conference on Machine Learning, p 201\u2013208","DOI":"10.1145\/1102351.1102377"},{"key":"742_CR16","unstructured":"Ghavamzadeh M, Engel Y (2006) Bayesian policy gradient algorithms. In: Advances in Neural Information Processing Systems (NIPS), p 457\u2013464"},{"key":"742_CR17","doi-asserted-by":"crossref","unstructured":"Ghavamzadeh M, Engel Y (2007) Bayesian actor-critic algorithms. In: Proceedings of the International Conference on Machine Learning, p 297\u2013304","DOI":"10.1145\/1273496.1273534"},{"key":"742_CR18","unstructured":"Guez A, Silver D, Dayan P (2012) Efficient Bayes-adaptive reinforcement learning using sample-based search. In: Advances in Neural Information Processing Systems (NIPS), p 1034\u20131042"},{"key":"742_CR19","unstructured":"Hauskrecht M, Meuleau N, Kaelbling LP, Dean T, Boutilier C (1998) Hierarchical solution of Markov decision processes using macro-actions. In: UAI, p 220\u2013229"},{"key":"742_CR20","doi-asserted-by":"crossref","unstructured":"He R, Brunskill E, Roy N (2010) PUMA: Planning under uncertainty with macro-actions. In: Proceedings of the Association for the Advancement of Artificial Intelligence (AAAI)","DOI":"10.1613\/jair.3171"},{"issue":"1","key":"742_CR21","doi-asserted-by":"crossref","first-page":"71","DOI":"10.1023\/B:APIN.0000011143.95085.74","volume":"20","author":"J Hong","year":"2004","unstructured":"Hong J, Prabhu VV (2004) Distributed reinforcement learning control for batch sequencing and sizing in just-in-time manufacturing systems. Appl Intell 20(1):71\u201387","journal-title":"Appl Intell"},{"issue":"1","key":"742_CR22","doi-asserted-by":"crossref","first-page":"89","DOI":"10.1007\/s10489-008-0115-1","volume":"31","author":"A Iglesias","year":"2009","unstructured":"Iglesias A, Mart\u00ednez P, Aler R, Fern\u00e1ndez F (2009) Learning teaching strategies in an adaptive and intelligent educational system through reinforcement learning. Appl Intell 31(1):89\u2013106","journal-title":"Appl Intell"},{"key":"742_CR23","unstructured":"Konidaris G, Barto AG (2009) Skill discovery in continuous reinforcement learning domains using skill chaining. In: Advances in Neural Information Processing Systems 22: 23rd Annual Conference on Neural Information Processing Systems 2009. Proceedings of a meeting held 7-10 December 2009, Vancouver, British Columbia, Canada., p 1015\u20131023"},{"key":"742_CR24","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1613\/jair.3093","volume":"39","author":"T Lang","year":"2010","unstructured":"Lang T, Toussaint M (2010) Planning with noisy probabilistic relational rules. J Artif Intell Res (JAIR) 39:1\u201349","journal-title":"J Artif Intell Res (JAIR)"},{"key":"742_CR25","unstructured":"Lim ZW, Hsu D, Sun LW (2011) Monte Carlo value iteration with macro-actions. In: Advances in Neural Information Processing Systems (NIPS), p 1287\u20131295"},{"key":"742_CR26","unstructured":"McGovern A, Barto AG (2001) Automatic discovery of subgoals in reinforcement learning using diverse density. In: Proceedings of the International Conference on Machine Learning, p 361\u2013 368"},{"key":"742_CR27","unstructured":"Mcgovern A, Sutton RS, Fagg AH (1997) Roles of macro-actions in accelerating reinforcement learning. In: In Grace Hopper Celebration of Women in Computing, p 13\u201318"},{"key":"742_CR28","unstructured":"Pineau J, Thrun S (2001) An integrated approach to hierarchy and abstraction for POMDPs. Tech. rep., Carnegie Mellon University. Robotics Institute"},{"key":"742_CR29","doi-asserted-by":"crossref","unstructured":"Poupart P, Vlassis NA, Hoey J, Regan K (2006) An analytic solution to discrete Bayesian reinforcement learning. In: Proceedings of the International Conference on Machine Learning, p 697\u2013704","DOI":"10.1145\/1143844.1143932"},{"key":"742_CR30","unstructured":"Ross S, Chaib-draa B, Pineau J (2007) Bayes-adaptive POMDPs. In: Advances in Neural Information Processing Systems (NIPS)"},{"key":"742_CR31","unstructured":"Ross S, Pineau J (2008) Model-based bayesian reinforcement learning in large structured domains. In: UAI, pp. 476\u2013483"},{"issue":"3","key":"742_CR32","doi-asserted-by":"crossref","first-page":"210","DOI":"10.1147\/rd.33.0210","volume":"3","author":"AL Samuel","year":"1959","unstructured":"Samuel AL (1959) Some studies in machine learning using the game of checkers. IBM J Res Dev 3(3):210\u2013229","journal-title":"IBM J Res Dev"},{"key":"742_CR33","unstructured":"Silver D, Veness J (2010) Monte-carlo planning in large POMDPs. In: Advances in Neural Information Processing Systems (NIPS), pp. 2164\u20132172"},{"key":"742_CR34","doi-asserted-by":"crossref","unstructured":"Simsek \u00d6., Barto AG (2004) Using relative novelty to identify useful temporal abstractions in reinforcement learning. In: Proceedings of the International Conference on Machine Learning","DOI":"10.1145\/1015330.1015353"},{"key":"742_CR35","unstructured":"Simsek \u00d6., Barto AG (2008) Skill characterization based on betweenness. In: Advances in Neural Information Processing Systems (NIPS), p 1497\u20131504"},{"key":"742_CR36","unstructured":"Singh SP, Bertsekas D (1996) Reinforcement learning for dynamic channel allocation in cellular telephone systems. In: Advances in Neural Information Processing Systems (NIPS), p 974\u2013980"},{"key":"742_CR37","unstructured":"Strens MJA (2000) A Bayesian framework for reinforcement learning. In: Proceedings of the International Conference on Machine Learning, p 943\u2013950"},{"key":"742_CR38","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton RS, Barto AG (1998) Reinforcement Learning: An Introduction. MIT Press, Cambridge"},{"issue":"1-2","key":"742_CR39","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"RS Sutton","year":"1999","unstructured":"Sutton RS, Precup D, Singh SP (1999) Between MDPs and semi-MDPs: A framework for temporal abstraction in reinforcement learning. Artif Intell 112(1-2):181\u2013211","journal-title":"Artif Intell"},{"key":"742_CR40","first-page":"257","volume":"8","author":"G Tesauro","year":"1992","unstructured":"Tesauro G (1992) Practical issues in temporal difference learning. Mach Learn 8:257\u2013277","journal-title":"Mach Learn"},{"issue":"2","key":"742_CR41","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1162\/neco.1994.6.2.215","volume":"6","author":"G Tesauro","year":"1994","unstructured":"Tesauro G (1994) TD-Gammon, a self-teaching backgammon program, achieves master-level play. Neural Comput 6(2):215\u2013219","journal-title":"Neural Comput"},{"issue":"3","key":"742_CR42","doi-asserted-by":"crossref","first-page":"58","DOI":"10.1145\/203330.203343","volume":"38","author":"G Tesauro","year":"1995","unstructured":"Tesauro G (1995) Temporal difference learning and TD-Gammon. Commun ACM 38(3):58\u201368","journal-title":"Commun ACM"},{"key":"742_CR43","unstructured":"Theocharous G, Kaelbling LP (2003) Approximate planning in POMDPs with macro-actions. In: Advances in Neural Information Processing Systems (NIPS)"},{"key":"742_CR44","doi-asserted-by":"crossref","unstructured":"Vien NA, Chung T (2007) Natural gradient policy for average cost SMDP problem. In: Proceedings of the IEEE International Conference on Tools with Artificial Intelligence, p 11\u201318","DOI":"10.1109\/ICTAI.2007.12"},{"key":"742_CR45","doi-asserted-by":"crossref","unstructured":"Vien NA, Chung T (2008) Policy gradient semi-Markov decision process. In: Proceedings of the IEEE International Conference on Tools with Artificial Intelligence, p 11\u201318","DOI":"10.1109\/ICTAI.2008.51"},{"key":"742_CR46","doi-asserted-by":"crossref","unstructured":"Vien NA, Ertel W (2012) Learning via human feedback in continuous state and action spaces. In: AAAI Fall Symposium: Robots Learning Interactively from Human Teachers","DOI":"10.1007\/s10489-012-0412-6"},{"key":"742_CR47","doi-asserted-by":"crossref","unstructured":"Vien NA, Ertel W (2012) Monte carlo tree search for bayesian reinforcement learning. In: 11th International Conference on Machine Learning and Applications, ICMLA, Boca Raton, FL, USA, December 12-15, 2012. Volume 1, p 138\u2013143","DOI":"10.1109\/ICMLA.2012.30"},{"issue":"2","key":"742_CR48","doi-asserted-by":"crossref","first-page":"271","DOI":"10.1587\/transinf.E93.D.271","volume":"93-D","author":"NA Vien","year":"2010","unstructured":"Vien NA, Lee S, Chung T (2010) Policy gradient based semi-Markov decision problems: Approximation and estimation errors. IEICE Trans 93-D(2):271\u2013279","journal-title":"IEICE Trans"},{"key":"742_CR49","unstructured":"Vien NA, Ngo H, Ertel W (2014) Monte Carlo Bayesian hierarchical reinforcement learning. In: Proceedings of the International Conference on Autonomous Agents and Multi-Agent Systems (AAMAS), pp. 1551\u20131552. International Foundation for Autonomous Agents and Multiagent Systems"},{"issue":"3","key":"742_CR50","doi-asserted-by":"crossref","first-page":"808","DOI":"10.1007\/s10489-014-0565-6","volume":"41","author":"NA Vien","year":"2014","unstructured":"Vien NA, Ngo H, Lee S, Ertel W (2014) Approximate planning for bayesian hierarchical reinforcement learning. Appl Intell 41(3):808\u2013819","journal-title":"Appl Intell"},{"key":"742_CR51","unstructured":"Vien NA, Toussaint M (2014) Model-based relational RL when object existence is partially observable. In: (ICML 2014)"},{"key":"742_CR52","unstructured":"Vien NA, Toussaint M (2015) Hierarchical Monte-Carlo planning. In: Proceedings of the Twenty-Ninth AAAI Conference on Artificial Intelligence, January 25-30, 2015, Austin, Texas, USA., p 3613\u20133619"},{"key":"742_CR53","doi-asserted-by":"crossref","unstructured":"Vien NA, Toussaint M (2015) POMDP manipulation via trajectory optimization. In: Proc. of the Int. Conf. on Intelligent Robots and Systems (IROS 2015)","DOI":"10.1109\/IROS.2015.7353381"},{"key":"742_CR54","doi-asserted-by":"crossref","unstructured":"Vien NA, Toussaint M (2015) Touch based POMDP manipulation via sequential submodular optimization. In: Proc. of the IEEE-RAS Int. Conf. on Humanoid Robots (Humanoids 2015)","DOI":"10.1109\/HUMANOIDS.2015.7363566"},{"key":"742_CR55","doi-asserted-by":"crossref","unstructured":"Vien NA, Viet NH, Lee S, Chung T (2007) Heuristic search based exploration in reinforcement learning. In: IWANN, p 110\u2013118","DOI":"10.1007\/978-3-540-73007-1_14"},{"key":"742_CR56","doi-asserted-by":"crossref","unstructured":"Vien NA, Viet NH, Lee S, Chung T (2007) Obstacle avoidance path planning for mobile robot based on ant-q reinforcement learning algorithm. In: ISNN (1), p 704\u2013713","DOI":"10.1007\/978-3-540-72383-7_83"},{"issue":"6","key":"742_CR57","doi-asserted-by":"crossref","first-page":"2008","DOI":"10.1587\/transcom.E92.B.2008","volume":"92-B","author":"NA Vien","year":"2009","unstructured":"Vien NA, Viet NH, Lee S, Chung T (2009) Policy gradient SMDP for resource allocation and routing in integrated services networks. IEICE Trans 92-B(6):2008\u20132022","journal-title":"IEICE Trans"},{"key":"742_CR58","doi-asserted-by":"crossref","unstructured":"Vien NA, Viet NH, Park H, Lee S, Chung T (2007) Q-learning based univector field navigation method for mobile robots. In: Khaled E (ed) Advances and Innovations in Systems, Computing Sciences and Software Engineering, p 463\u2013468","DOI":"10.1007\/978-1-4020-6264-3_80"},{"issue":"9","key":"742_CR59","doi-asserted-by":"crossref","first-page":"1671","DOI":"10.1016\/j.ins.2011.01.001","volume":"181","author":"NA Vien","year":"2011","unstructured":"Vien NA, Yu H, Chung T (2011) Hessian matrix distribution for Bayesian policy gradient reinforcement learning. Inf Sci 181(9):1671\u20131685","journal-title":"Inf Sci"},{"key":"742_CR60","doi-asserted-by":"crossref","unstructured":"Viet NH, Vien NA, Chung T (2008) Policy gradient SMDP for resource allocation and routing in integrated services networks. In: ICNSC, p 1541\u20131546","DOI":"10.1109\/ICNSC.2008.4525466"},{"key":"742_CR61","doi-asserted-by":"crossref","unstructured":"Viet NH, Vien NA, Lee S, Chung T (2008) Obstacle avoidance path planning for mobile robot based on multi colony ant algorithm. In: ACHI, p 285\u2013289","DOI":"10.1109\/ACHI.2008.42"},{"key":"742_CR62","doi-asserted-by":"crossref","unstructured":"Wang T, Lizotte DJ, Bowling MH, Schuurmans D (2005) Bayesian sparse sampling for on-line reward optimization. In: Proceedings of the International Conference on Machine Learning, p 956\u2013963","DOI":"10.1145\/1102351.1102472"},{"key":"742_CR63","unstructured":"Wang Y, Won KS, Hsu D, Lee WS (2010) Monte Carlo Bayesian reinforcement learning. In: Proceedings of the International Conference on Machine Learning"},{"issue":"2","key":"742_CR64","doi-asserted-by":"crossref","first-page":"348","DOI":"10.1287\/opre.24.2.348","volume":"24","author":"CC White","year":"1976","unstructured":"White CC (1976) Procedures for the solution of a finite-horizon, partially observed, semi-Markov optimization problem. Oper Res 24(2):348\u2013358","journal-title":"Oper Res"},{"key":"742_CR65","unstructured":"Zhang W, Dietterich TG (1995) A reinforcement learning approach to job-shop scheduling. In: International Joint Conferences on Artificial Intelligence, p 1114\u20131120"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-015-0742-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10489-015-0742-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-015-0742-2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,16]],"date-time":"2023-08-16T17:26:57Z","timestamp":1692206817000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10489-015-0742-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,1,29]]},"references-count":65,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2016,7]]}},"alternative-id":["742"],"URL":"https:\/\/doi.org\/10.1007\/s10489-015-0742-2","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,1,29]]}}}