{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,11]],"date-time":"2025-10-11T08:21:48Z","timestamp":1760170908314,"version":"3.37.3"},"reference-count":37,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"8","license":[{"start":{"date-parts":[[2015,8,1]],"date-time":"2015-08-01T00:00:00Z","timestamp":1438387200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"funder":[{"DOI":"10.13039\/501100004410","name":"T\u00fcrkiye Bilimsel ve Teknolojik Ara\u015ftirma Kurumu","doi-asserted-by":"publisher","award":["113E239"],"award-info":[{"award-number":["113E239"]}],"id":[{"id":"10.13039\/501100004410","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Cybern."],"published-print":{"date-parts":[[2015,8]]},"DOI":"10.1109\/tcyb.2014.2352038","type":"journal-article","created":{"date-parts":[[2014,9,9]],"date-time":"2014-09-09T18:41:22Z","timestamp":1410288082000},"page":"1414-1425","source":"Crossref","is-referenced-by-count":11,"title":["Toward Generalization of Automated Temporal Abstraction to Partially Observable Reinforcement Learning"],"prefix":"10.1109","volume":"45","author":[{"given":"Erkin","family":"Cilden","sequence":"first","affiliation":[]},{"given":"Faruk","family":"Polat","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"article-title":"Tractable planning under uncertainty: Exploiting structure","year":"2004","author":"pineau","key":"ref33"},{"article-title":"Finding approximate POMDP solutions through belief compression","year":"2003","author":"roy","key":"ref32"},{"key":"ref31","first-page":"348","article-title":"Abstraction in model based partially observable reinforcement learning using extended sequence trees","volume":"2","year":"2012","journal-title":"Proc IEEE\/WIC\/ACM Int Conf Web Intell Intell Agent Technol"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.1996.571080"},{"key":"ref37","first-page":"1088","article-title":"Approximating optimal policies for partially observable stochastic domains","volume":"2","author":"parr","year":"1995","journal-title":"Proc 14th Int Joint Conf Artif Intell"},{"key":"ref36","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1613\/jair.639","article-title":"Hierarchical reinforcement learning with the MAXQ value function decomposition","volume":"13","author":"dietterich","year":"2000","journal-title":"J Artif Intell Res"},{"key":"ref35","first-page":"520","article-title":"Heuristic search value iteration for POMDPs","author":"smith","year":"2004","journal-title":"Proc 20th Conf Uncertainty Artif Intell"},{"article-title":"Exact and approximate algorithms for partially observable Markov decision processes","year":"1998","author":"cassandra","key":"ref34"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(98)00023-X"},{"key":"ref11","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","article-title":"Reinforcement learning: A survey","volume":"4","author":"kaelbling","year":"1996","journal-title":"J Artif Intell Res"},{"key":"ref12","first-page":"775","article-title":"Approximate planning in POMDPs with macro-actions","volume":"16","author":"theocharous","year":"2004","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/SICE.2007.4421430"},{"journal-title":"Dynamic Programming","year":"1957","author":"bellman","key":"ref14"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/BF00115009"},{"article-title":"Learning from delayed rewards","year":"1989","author":"watkins","key":"ref16"},{"key":"ref17","first-page":"393","article-title":"Reinforcement learning methods for continuous-time Markov decision problems","volume":"7","author":"bradtke","year":"1994","journal-title":"Proc Adv Neural Inf Process Syst"},{"article-title":"Hierarchical control and learning for Markov decision processes","year":"1998","author":"parr","key":"ref18"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"key":"ref28","first-page":"707","article-title":"An improved grid-based approximation algorithm for POMDPs","volume":"1","author":"zhou","year":"2001","journal-title":"Proc 17th Int Joint Conf Artif Intell"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-45622-8_16"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/3477.846230"},{"key":"ref3","first-page":"361","article-title":"Automatic discovery of subgoals in reinforcement learning using diverse density","author":"mcgovern","year":"2001","journal-title":"Proc 18th Int Conf Mach Learn"},{"key":"ref6","first-page":"95","article-title":"Using relative novelty to identify useful temporal abstractions in reinforcement learning","author":"?im?ek","year":"2004","journal-title":"Proc 21st Int Conf Mach Learn"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1287\/opre.39.1.162"},{"key":"ref5","first-page":"295","article-title":"Q-cut&#x2014;Dynamic discovery of sub-goals in reinforcement learning","author":"menache","year":"2002","journal-title":"Proc 13th Eur Conf Mach Learn"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCB.2007.899419"},{"key":"ref7","article-title":"acQuire-macros: An algorithm for automatically learning macro-actions","author":"mcgovern","year":"1998","journal-title":"Proc Neural Inf Process Syst Conf Workshop Abstraction Hierarchy Reinforcement Learn"},{"key":"ref2","first-page":"243","article-title":"Discovering hierarchy in reinforcement learning with HEXQ","author":"hengst","year":"2002","journal-title":"Proc 19th Int Conf Mach Learn"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-010-5182-y"},{"journal-title":"Reinforcement Learning An Introduction","year":"1998","author":"sutton","key":"ref1"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1023\/A:1025696116075"},{"article-title":"Reinforcement learning with selective perception and hidden state","year":"1996","author":"mccallum","key":"ref22"},{"key":"ref21","first-page":"183","article-title":"Reinforcement learning with perceptual aliasing: The perceptual distinctions approach","author":"chrisman","year":"1992","journal-title":"Proc Nat Conf Artif Intell"},{"article-title":"Reinforcement learning for robots using neural networks","year":"1993","author":"lin","key":"ref24"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/3477.499796"},{"key":"ref26","first-page":"225","article-title":"Automated hierarchy discovery for planning in partially observable environments","volume":"19","author":"charlin","year":"2007","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref25","first-page":"495","article-title":"Learning policies for partially observable environments: Scaling up","author":"littman","year":"1998","journal-title":"Readings in Agents"}],"container-title":["IEEE Transactions on Cybernetics"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6221036\/7156182\/06894577.pdf?arnumber=6894577","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,12]],"date-time":"2022-01-12T16:04:15Z","timestamp":1642003455000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/6894577\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,8]]},"references-count":37,"journal-issue":{"issue":"8"},"URL":"https:\/\/doi.org\/10.1109\/tcyb.2014.2352038","relation":{},"ISSN":["2168-2267","2168-2275"],"issn-type":[{"type":"print","value":"2168-2267"},{"type":"electronic","value":"2168-2275"}],"subject":[],"published":{"date-parts":[[2015,8]]}}}