{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T05:57:40Z","timestamp":1775109460576,"version":"3.50.1"},"reference-count":35,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2015,4,1]],"date-time":"2015-04-01T00:00:00Z","timestamp":1427846400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"funder":[{"DOI":"10.13039\/501100003725","name":"National Research Foundation of Korea","doi-asserted-by":"publisher","award":["NRF-2012R1A1A2007881"],"award-info":[{"award-number":["NRF-2012R1A1A2007881"]}],"id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"publisher"}]},{"name":"IT Research and Development Program of MKE\/KEIT","award":["10041678"],"award-info":[{"award-number":["10041678"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Cybern."],"published-print":{"date-parts":[[2015,4]]},"DOI":"10.1109\/tcyb.2014.2336867","type":"journal-article","created":{"date-parts":[[2014,10,1]],"date-time":"2014-10-01T18:50:21Z","timestamp":1412189421000},"page":"793-805","source":"Crossref","is-referenced-by-count":28,"title":["Hierarchical Bayesian Inverse Reinforcement Learning"],"prefix":"10.1109","volume":"45","author":[{"family":"Jaedeug Choi","sequence":"first","affiliation":[]},{"family":"Kee-Eung Kim","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1613\/jair.639","article-title":"Hierarchical reinforcement learning with the MAXQ value function decomposition","volume":"13","author":"dietterich","year":"2000","journal-title":"J Artif Intell Res"},{"key":"ref32","author":"bertsekas","year":"1995","journal-title":"Dynamic Programming and Optimal Control"},{"key":"ref31","first-page":"1795","article-title":"Trip router with individualized preferences (TRIP): Incorporating personalization into route planning","author":"letchner","year":"2006","journal-title":"Proc 21st Nat Conf Artif Intell"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/COMSNETS.2009.4808865"},{"key":"ref35","doi-asserted-by":"crossref","first-page":"1863","DOI":"10.1109\/TNN.2011.2168422","article-title":"Hierarchical approximate policy iteration with binary-tree state space decomposition","volume":"22","author":"xu","year":"2011","journal-title":"IEEE Trans Neural Netw"},{"key":"ref34","first-page":"2629","article-title":"Hierarchical average reward reinforcement learning","volume":"8","author":"ghavamzadeh","year":"2007","journal-title":"J Mach Learn Res"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143936"},{"key":"ref11","first-page":"295","article-title":"Apprenticeship learning using inverse reinforcement learning and gradient methods","author":"neu","year":"2007","journal-title":"Proc 23rd Conf Uncertainty Artif Intell"},{"key":"ref12","first-page":"1433","article-title":"Maximum entropy inverse reinforcement learning","author":"ziebart","year":"2008","journal-title":"Proc 23rd AAAI Conf Artif Intell"},{"key":"ref13","first-page":"2586","article-title":"Bayesian inverse reinforcement learning","author":"ramachandran","year":"2007","journal-title":"Proc 20th Int Joint Conf Artif Intell"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.2307\/1884852"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.2307\/3003600"},{"key":"ref17","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/1654.001.0001","author":"gigerenzer","year":"2002","journal-title":"Bounded Rationality The Adaptive Toolbox"},{"key":"ref18","first-page":"1449","article-title":"A game-theoretic approach to apprenticeship learning","volume":"20","author":"syed","year":"2007","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref19","first-page":"34","article-title":"Preference elicitation and inverse reinforcement learning","author":"rothkopf","year":"2011","journal-title":"Proc 22nd Eur Conf Mach Learn"},{"key":"ref28","article-title":"Bootstrapping apprenticeship learning","volume":"23","author":"boularias","year":"2010","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/1409635.1409678"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390286"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1177\/0278364910371999"},{"key":"ref6","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2011-302","article-title":"User simulation in dialogue systems using inverse reinforcement learning","author":"chandramohan","year":"2011","journal-title":"Proc INTERSPEECH"},{"key":"ref29","author":"dixon","year":"2001","journal-title":"Surfing Economics Essays for the Inquiring Economist"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1287\/deca.1100.0185"},{"key":"ref8","article-title":"Improving hybrid vehicle fuel efficiency using inverse reinforcement learning","author":"vogel","year":"2012","journal-title":"Proc 26th AAAI Conf Artif Intell"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/1833349.1778859"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCA.2010.2055152"},{"key":"ref9","first-page":"663","article-title":"Algorithms for inverse reinforcement learning","author":"ng","year":"2000","journal-title":"Proc 17th Int Conf Mach Learn"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/279943.279964"},{"key":"ref20","first-page":"1989","article-title":"MAP inference for Bayesian inverse reinforcement learning","volume":"24","author":"choi","year":"2011","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref22","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","author":"puterman","year":"1994","journal-title":"Markov Decision Processes Discrete Stochastic Dynamic Programming"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/3468.650327"},{"key":"ref24","doi-asserted-by":"crossref","DOI":"10.1093\/oso\/9780199251063.001.0001","author":"brocas","year":"2003","journal-title":"The Psychology of Economic Decisions"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.2.271"},{"key":"ref26","author":"casella","year":"2001","journal-title":"Statistical Inference"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1257\/000282803322655392"}],"container-title":["IEEE Transactions on Cybernetics"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6221036\/7060779\/06914557.pdf?arnumber=6914557","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,3]],"date-time":"2024-06-03T09:43:32Z","timestamp":1717407812000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6914557\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,4]]},"references-count":35,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/tcyb.2014.2336867","relation":{},"ISSN":["2168-2267","2168-2275"],"issn-type":[{"value":"2168-2267","type":"print"},{"value":"2168-2275","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015,4]]}}}