{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,1,10]],"date-time":"2024-01-10T01:03:25Z","timestamp":1704848605866},"reference-count":62,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2014,7,20]],"date-time":"2014-07-20T00:00:00Z","timestamp":1405814400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2014,10]]},"DOI":"10.1007\/s10489-014-0565-6","type":"journal-article","created":{"date-parts":[[2014,7,19]],"date-time":"2014-07-19T04:54:07Z","timestamp":1405745647000},"page":"808-819","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Approximate planning for bayesian hierarchical reinforcement learning"],"prefix":"10.1007","volume":"41","author":[{"given":"Ngo Anh","family":"Vien","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hung","family":"Ngo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sungyoung","family":"Lee","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"TaeChoong","family":"Chung","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2014,7,20]]},"reference":[{"key":"565_CR1","unstructured":"Abbeel P, Coates A, Quigley M, Ng AY (2006) An application of reinforcement learning to aerobatic helicopter flight. In: Advances in neural information processing systems (NIPS), pp 1\u20138"},{"issue":"2","key":"565_CR2","doi-asserted-by":"crossref","first-page":"201","DOI":"10.1007\/s10489-013-0455-3","volume":"40","author":"M Abdoos","year":"2014","unstructured":"Abdoos M, Mozayani N, Bazzan ALC (2014) Hierarchical control of traffic signals using q-learning with tile coding. Appl Intell 40(2):201\u2013213","journal-title":"Appl Intell"},{"key":"565_CR3","unstructured":"Asmuth J, Littman ML (2011) Learning is planning: near Bayesoptimal reinforcement learning via Monte-Carlo tree search. In: UAI, pp 19\u201326"},{"key":"565_CR4","unstructured":"Atkeson CG (1997) Nonparametric model-based reinforcement learning. In: Advances in neural information processing systems (NIPS)"},{"key":"565_CR5","doi-asserted-by":"crossref","unstructured":"Bai H, Hsu D, Lee WS, Vien NA (2010) Monte Carlo value iteration for continuous-state POMDPs. In: Algorithmic foundations of robotics IX, pp 175\u2013191","DOI":"10.1007\/978-3-642-17452-0_11"},{"issue":"4","key":"565_CR6","doi-asserted-by":"crossref","first-page":"341","DOI":"10.1023\/A:1025696116075","volume":"13","author":"AG Barto","year":"2003","unstructured":"Barto AG, Mahadevan S (2003) Recent advances in hierarchical reinforcement learning. Discrete Event Dyn Syst 13(4):341\u2013379","journal-title":"Discrete Event Dyn Syst"},{"issue":"3","key":"565_CR7","doi-asserted-by":"crossref","first-page":"243","DOI":"10.1023\/A:1007634325138","volume":"40","author":"J Baxter","year":"2000","unstructured":"Baxter J, Tridgell A, Weaver L (2000) Learning to play chess using temporal differences. Mach Learn 40(3):243\u2013263","journal-title":"Mach Learn"},{"key":"565_CR8","unstructured":"Cao F, Ray S (2012) Bayesian hierarchical reinforcement learning. In: Bartlett P, Pereira F, Burges C, Bottou L, Weinberger K (eds) Advances in neural information processing systems (NIPS), pp 73\u201381"},{"key":"565_CR9","unstructured":"Castro PS, Precup D (2007) Using linear programming for Bayesian exploration in Markov decision processes. In: IJCAI, pp 2437\u20132442"},{"key":"565_CR10","unstructured":"Dearden R, Friedman N, Russell SJ (1998) Bayesian Q-learning. In: AAAI, pp 761\u2013768"},{"key":"565_CR11","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1613\/jair.639","volume":"13","author":"TG Dietterich","year":"2000","unstructured":"Dietterich TG (2000) Hierarchical reinforcement learning with the MAXQ value function decomposition. J Artif Intell Res (JAIR) 13:227\u2013303","journal-title":"J Artif Intell Res (JAIR)"},{"key":"565_CR12","unstructured":"Duff M (2002) Optimal learning: Computational procedures for Bayes-adaptive Markov decision processes. PhD thesis, University of Massassachusetts Amherst"},{"key":"565_CR13","unstructured":"Engel Y, Mannor S, Meir R (2003) Bayes meets Bellman: The Gaussian process approach to temporal difference learning. In: Proceedings of the international conference on machine learning, pp 154\u2013161"},{"key":"565_CR14","doi-asserted-by":"crossref","unstructured":"Engel Y, Mannor S, Meir R (2005) Reinforcement learning with Gaussian processes. In: Proceedings of the International Conference on Machine Learning, pp 201\u2013208","DOI":"10.1145\/1102351.1102377"},{"key":"565_CR15","unstructured":"Furmston T, Barber D (2010) Variational methods for reinforcement learning. In: AISTATS, pp 241\u2013248"},{"key":"565_CR16","unstructured":"Ghavamzadeh M, Engel Y (2006) Bayesian policy gradient algorithms. In: Advances in neural information processing systems (NIPS), pp 457\u2013464"},{"key":"565_CR17","doi-asserted-by":"crossref","unstructured":"Ghavamzadeh M, Engel Y (2007) Bayesian actor-critic algorithms. In: Proceedings of the international conference on machine learning, pp 297\u2013304","DOI":"10.1145\/1273496.1273534"},{"key":"565_CR18","doi-asserted-by":"crossref","unstructured":"Granmo OC, Glimsdal S (2012) Accelerated Bayesian learning for decentralized two-armed bandit based decision making with applications to the goore game. Appl Intell","DOI":"10.1007\/s10489-012-0346-z"},{"key":"565_CR19","unstructured":"Guez A, Silver D, Dayan P (2012) Efficient Bayes-adaptive reinforcement learning using sample-based search. In: Advances in neural information processing systems (NIPS), pp 1034\u20131042"},{"key":"565_CR20","unstructured":"Hauskrecht M, Meuleau N, Kaelbling LP, Dean T, Boutilier C (1998) Hierarchical solution of Markov decision processes using macro-actions. In: UAI, pp 220\u2013229"},{"key":"565_CR21","doi-asserted-by":"crossref","unstructured":"He R, Brunskill E, Roy N (2010) PUMA: Planning under uncertainty with macro-actions. In: Proceedings of the association for the advancement of artificial intelligence (AAAI)","DOI":"10.1613\/jair.3171"},{"issue":"1","key":"565_CR22","doi-asserted-by":"crossref","first-page":"71","DOI":"10.1023\/B:APIN.0000011143.95085.74","volume":"20","author":"J Hong","year":"2004","unstructured":"Hong J, Prabhu VV (2004) Distributed reinforcement learning control for batch sequencing and sizing in just-in-time manufacturing systems. Applied Intelligence 20(1):71\u201387","journal-title":"Applied Intelligence"},{"issue":"1","key":"565_CR23","doi-asserted-by":"crossref","first-page":"89","DOI":"10.1007\/s10489-008-0115-1","volume":"31","author":"A Iglesias","year":"2009","unstructured":"Iglesias A, Mart\u00ednez P, Aler R, Fern\u00e1ndez F (2009) Learning teaching strategies in an adaptive and intelligent educational system through reinforcement learning. Appl Intell 31(1):89\u2013106","journal-title":"Appl Intell"},{"key":"565_CR24","doi-asserted-by":"crossref","unstructured":"Jong NK, Stone P (2008) Hierarchical model-based reinforcement learning: Rmax + MAXQ. In: Proceedings of the international","DOI":"10.1145\/1390156.1390211"},{"issue":"2","key":"565_CR25","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1007\/s10489-009-0191-x","volume":"34","author":"J Li","year":"2011","unstructured":"Li J, Li Z, Chen J (2011) Microassembly path planning using reinforcement learning for improving positioning accuracy of a 1 cm3 omni-directional mobile microrobot. Appl Intell 34(2):211\u2013225","journal-title":"Appl Intell"},{"key":"565_CR26","unstructured":"Lim ZW, Hsu D, Sun LW(2011) Monte Carlo value iteration with macro-actions. In: Advances in neural information processing systems (NIPS), pp 1287\u20131295"},{"key":"565_CR27","unstructured":"Ngo H, LuciwM, F\u00a8orster A, Schmidhuber J (2012) Learning skills from play: Artificial curiosity on a Katana robot arm In: Proceedings of the international joint conference of neural networks (IJCNN)"},{"key":"565_CR28","doi-asserted-by":"crossref","unstructured":"Ngo H, Luciw M, F\u00f6rster A, Schmidhuber J (2013) Confidence-based progress-driven self-generated goals for skill acquisition in developmental robots. Front Psychol 4","DOI":"10.3389\/fpsyg.2013.00833"},{"key":"565_CR29","doi-asserted-by":"crossref","unstructured":"Pakizeh E, Palhang M, Pedram MM (2012) Multi-criteria expertness based cooperative Q-learning. Appl Intell","DOI":"10.1007\/s10489-012-0392-6"},{"key":"565_CR30","volume-title":"Tractable planning under uncertainty: exploiting structure. Ph.D. thesis","author":"J Pineau","year":"2004","unstructured":"Pineau J (2004) Tractable planning under uncertainty: exploiting structure. Ph.D. thesis. Robotics Institute, Carnegie Mellon University"},{"key":"565_CR31","volume-title":"An integrated approach to hierarchy and abstraction for POMDPs. Tech. rep.","author":"J Pineau","year":"2001","unstructured":"Pineau J, Thrun S (2001) An integrated approach to hierarchy and abstraction for POMDPs. Tech. rep. Carnegie Mellon University, Robotics Institute"},{"key":"565_CR32","first-page":"2329","volume":"7","author":"JM Porta","year":"2006","unstructured":"Porta JM, Vlassis NA, Spaan MTJ, Poupart P (2006) Point-based value iteration for continuous POMDPs. JMLR 7:2329\u20132367","journal-title":"JMLR"},{"key":"565_CR33","doi-asserted-by":"crossref","unstructured":"Poupart P, Vlassis NA, Hoey J, Regan K (2006) An analytic solution to discrete Bayesian reinforcement learning. In: Proceedings of the international conference on machine learning, pp 697\u2013704","DOI":"10.1145\/1143844.1143932"},{"key":"565_CR34","unstructured":"Ross S, Chaib-draa B, Pineau J (2007) Bayes-adaptive POMDPs. In: Advances in neural information processing systems (NIPS)"},{"key":"565_CR35","unstructured":"Ross S, Pineau J Model-based bayesian reinforcement learning in large structured domains. In: UAI, pp. 476\u2013483, (2008)"},{"issue":"3","key":"565_CR36","doi-asserted-by":"crossref","first-page":"210","DOI":"10.1147\/rd.33.0210","volume":"3","author":"AL Samuel","year":"1959","unstructured":"Samuel AL (1959) Some studies in machine learning using the game of checkers. IBM J Res Dev 3(3):210\u2013229","journal-title":"IBM J Res Dev"},{"key":"565_CR37","unstructured":"Singh SP, Bertsekas D (1996) Reinforcement learning for dynamic channel allocation in cellular telephone systems. In: Advances in neural information processing systems (NIPS), pp 974\u2013980"},{"key":"565_CR38","unstructured":"Strens MJA (2000) A Bayesian framework for reinforcement learning. In: Proceedings of the international conference on machine learning, pp 943\u2013950"},{"issue":"7-8","key":"565_CR39","doi-asserted-by":"crossref","first-page":"2039","DOI":"10.1007\/s00521-013-1445-4","volume":"23","author":"S Sun","year":"2013","unstructured":"Sun S (2013) A review of deterministic approximate inference techniques for Bayesian machine learning. Neural Comput Applic 23(7-8):2039\u20132050","journal-title":"Neural Comput Applic"},{"key":"565_CR40","volume-title":"Reinforcement learning: An introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton RS, Barto AG (1998) Reinforcement learning: An introduction. MIT Press, Cambridge, MA"},{"issue":"1-2","key":"565_CR41","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"RS Sutton","year":"1999","unstructured":"Sutton RS, Precup D, Singh SP (1999) Between MDPs and semi-MDPs: A framework for temporal abstraction in reinforcement learning. Artif Intell 112(1-2):181\u2013211","journal-title":"Artif Intell"},{"issue":"1","key":"565_CR42","doi-asserted-by":"crossref","first-page":"1","DOI":"10.2200\/S00268ED1V01Y201005AIM009","volume":"4","author":"C Szepesv\u00e1ri","year":"2010","unstructured":"Szepesv\u00e1ri C (2010) Algorithms for reinforcement learning. Synth Lect Artif Intell Mach Learn 4(1):1\u2013103","journal-title":"Synth Lect Artif Intell Mach Learn"},{"key":"565_CR43","first-page":"257","volume":"8","author":"G Tesauro","year":"1992","unstructured":"Tesauro G (1992) Practical issues in temporal difference learning. Mach Learn 8:257\u2013277","journal-title":"Mach Learn"},{"issue":"2","key":"565_CR44","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1162\/neco.1994.6.2.215","volume":"6","author":"G Tesauro","year":"1994","unstructured":"Tesauro G (1994) TD-Gammon, a self-teaching backgammon program, achieves master-level play. Neural Comput 6(2):215\u2013219","journal-title":"Neural Comput"},{"issue":"3","key":"565_CR45","doi-asserted-by":"crossref","first-page":"58","DOI":"10.1145\/203330.203343","volume":"38","author":"G Tesauro","year":"1995","unstructured":"Tesauro G (1995) Temporal difference learning and TD-Gammon. Commun ACM 38(3):58\u201368","journal-title":"Commun ACM"},{"key":"565_CR46","unstructured":"Strens MJA (2000) A Bayesian framework for reinforcement learning. In: Proceedings of the international conference on machine learning, pp 943\u2013950"},{"key":"565_CR47","unstructured":"Turkett WH Robust multiagent plan generation and execution with decision theoretic planners. Ph.D. thesis, Department of Computer Science and Engineering, University of South Carolina (1998)"},{"key":"565_CR48","doi-asserted-by":"crossref","unstructured":"Vien NA, Chung T (2007) Natural gradient policy for average cost SMDP problem. In: Proceedings of the IEEE international conference on tools with artificial intelligence, pp 11\u2013 18","DOI":"10.1109\/ICTAI.2007.12"},{"key":"565_CR49","unstructured":"Vien NA, Chung T (2008) Policy gradient semi-Markov decision process. In: Proceedings of the IEEE international conference on tools with artificial intelligence, pp 11\u201318"},{"key":"565_CR50","doi-asserted-by":"crossref","unstructured":"Vien NA, Ertel W, Chung T (2013) Learning via human feedback in continuous state and action spaces. Appl Intell 39(2)","DOI":"10.1007\/s10489-012-0412-6"},{"issue":"2","key":"565_CR51","doi-asserted-by":"crossref","first-page":"345","DOI":"10.1007\/s10489-012-0416-2","volume":"39","author":"NA Vien","year":"2013","unstructured":"Vien NA, Ertel W, Dang VH, Chung T (2013) Monte-Carlo tree search for Bayesian reinforcement learning. Appl Intell 39(2):345\u2013353","journal-title":"Appl Intell"},{"key":"565_CR52","unstructured":"Vien NA, Ngo H, Ertel W (2014) Monte Carlo Bayesian hierarchical reinforcement learning. In: Proceedings of the international conference on autonomous agents and multi-agent systems (AAMAS), pp 1551\u20131552. International Foundation for Autonomous Agents and Multiagent Systems (2014)"},{"key":"565_CR53","doi-asserted-by":"crossref","unstructured":"Vien NA, Viet NH, Lee S, Chung T (2007) Heuristic search based exploration in reinforcement learning. In: IWANN, pp 110\u2013118","DOI":"10.1007\/978-3-540-73007-1_14"},{"key":"565_CR54","doi-asserted-by":"crossref","unstructured":"Vien NA, Viet NH, Lee S, Chung T (2007) Obstacle avoidance path planning for mobile robot based on ant-q reinforcement learning algorithm. In: ISNN (1), pp 704\u2013713","DOI":"10.1007\/978-3-540-72383-7_83"},{"issue":"6","key":"565_CR55","doi-asserted-by":"crossref","first-page":"2008","DOI":"10.1587\/transcom.E92.B.2008","volume":"92-B","author":"NA Vien","year":"2009","unstructured":"Vien NA, Viet NH, Lee S, Chung T (2009) Policy gradient SMDP for resource allocation and routing in integrated services networks. IEICE Trans 92-B (6):2008\u20132022","journal-title":"IEICE Trans"},{"issue":"9","key":"565_CR56","doi-asserted-by":"crossref","first-page":"1671","DOI":"10.1016\/j.ins.2011.01.001","volume":"181","author":"NA Vien","year":"2011","unstructured":"Vien NA, Yu H, Chung T (2011) Hessian matrix distribution for Bayesian policy gradient reinforcement learning. Info Sci 181(9):1671\u20131685","journal-title":"Info Sci"},{"key":"565_CR57","doi-asserted-by":"crossref","unstructured":"Viet NH, Vien NA, Chung T (2008) Policy gradient SMDP for resource allocation and routing in integrated services networks. In: ICNSC, pp 1541\u20131546","DOI":"10.1109\/ICNSC.2008.4525466"},{"key":"565_CR58","doi-asserted-by":"crossref","unstructured":"Wang T, Lizotte DJ, Bowling MH, Schuurmans D (2005) Bayesian sparse sampling for on-line reward optimization. In: Proceedings of the international conference on machine learning, pp 956\u2013963","DOI":"10.1145\/1102351.1102472"},{"key":"565_CR59","unstructured":"Wang Y, Won KS, Hsu D, Lee WS (2010) Monte Carlo Bayesian reinforcement learning. In: Proceedings of the international conference on machine learning"},{"issue":"2","key":"565_CR60","doi-asserted-by":"crossref","first-page":"348","DOI":"10.1287\/opre.24.2.348","volume":"24","author":"CC White","year":"1976","unstructured":"White CC (1976) Procedures for the solution of a finite-horizon, partially observed, semi-Markov optimization problem. Oper Res 24(2):348\u2013358","journal-title":"Oper Res"},{"key":"565_CR61","doi-asserted-by":"crossref","unstructured":"Wu B, Zheng HY, Feng YP (2014) Point-based online value iteration algorithm in large pomdp. Appl Intell:546\u2013555","DOI":"10.1007\/s10489-013-0479-8"},{"key":"565_CR62","unstructured":"Zhang W, Dietterich TG (1995) A reinforcement learning approach to job-shop scheduling. In: International joint conferences on artificial intelligence, pp 1114\u20131120"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-014-0565-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10489-014-0565-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-014-0565-6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,8,13]],"date-time":"2019-08-13T00:53:17Z","timestamp":1565657597000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10489-014-0565-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,7,20]]},"references-count":62,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2014,10]]}},"alternative-id":["565"],"URL":"https:\/\/doi.org\/10.1007\/s10489-014-0565-6","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014,7,20]]}}}