{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2022,4,1]],"date-time":"2022-04-01T20:24:37Z","timestamp":1648844677484},"reference-count":13,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2002,12,1]],"date-time":"2002-12-01T00:00:00Z","timestamp":1038700800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Artif Life Robotics"],"published-print":{"date-parts":[[2002,12]]},"DOI":"10.1007\/bf02481264","type":"journal-article","created":{"date-parts":[[2006,8,15]],"date-time":"2006-08-15T11:33:40Z","timestamp":1155641620000},"page":"181-184","source":"Crossref","is-referenced-by-count":0,"title":["Labeling Q-learning in hidden state environments"],"prefix":"10.1007","volume":"6","author":[{"given":"Hae-Yeon","family":"Lee","sequence":"first","affiliation":[]},{"given":"Hiroyuki","family":"Kamaya","sequence":"additional","affiliation":[]},{"given":"Ken-ichi","family":"Abe","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"BF02481264_CR1","unstructured":"Lin L, Mitchell TM (1992) Reinforcement learning with hidden states. Proceedings of the Second International Conference on Simulation of Adaptive Behavior, p 271\u2013280"},{"key":"BF02481264_CR2","doi-asserted-by":"crossref","first-page":"464","DOI":"10.1109\/3477.499796","volume":"26","author":"A McCallum","year":"1996","unstructured":"McCallum A (1996) Hidden state and reinforcement learning with instance-based state identification. IEEE Trans Syst Man Cybern B 26:464\u2013473","journal-title":"IEEE Trans Syst Man Cybern B"},{"key":"BF02481264_CR3","doi-asserted-by":"crossref","first-page":"219","DOI":"10.1177\/105971239700600202","volume":"6","author":"M Wiering","year":"1997","unstructured":"Wiering M, Schmidhuber J (1997) HQ-learning. Adapt Behav 6:219\u2013246","journal-title":"Adapt Behav"},{"key":"BF02481264_CR4","doi-asserted-by":"crossref","first-page":"403","DOI":"10.1109\/3477.846230","volume":"30","author":"S Ron","year":"2000","unstructured":"Ron S, Session C (2000) Self-segmentation of sequences: automatic formation of hierarchies of sequential behaviors. IEEE Trans Syst Man Cybern B30:403\u2013418","journal-title":"IEEE Trans Syst Man Cybern B"},{"key":"BF02481264_CR5","unstructured":"Lee H-Y, Kamaya H, Abe K (1999) Labeling Q-learning for non-Markovian environments. Proceedings of the IEEE International Conference on System, Man, and Cybernetics (SMC'99), Tokyo, October 12\u201315, 1999, p. 487\u2013491"},{"key":"BF02481264_CR6","unstructured":"Lee H-Y, Kamaya H, Abe K (2000) Labeling Q-learning for partially observable Markov decision process environments. Proceedings of the International Symposium on Artificial Life and Robotics (AROB5th'00), Oita, Japan, January 26\u201328, 2000, p 484\u2013487"},{"key":"BF02481264_CR7","unstructured":"Lee H-Y, Kamaya H, Abe K (2000) Labeling Q-learning for maze problems with partially observable states. 5th Korean Automatic Control Conference (KACC2000), Yong-In, Korea, October 19\u201321, 2000, p 489\u2013492"},{"key":"BF02481264_CR8","first-page":"279","volume":"8","author":"CJCH Watkins","year":"1992","unstructured":"Watkins CJCH, Dayan P (1992) Q-learning. Mach Learn 8:279\u2013292","journal-title":"Mach Learn"},{"key":"BF02481264_CR9","volume-title":"Reinforcement learning: an introduction","author":"RS Sutton","year":"1988","unstructured":"Sutton RS, Barto AG (1988) Reinforcement learning: an introduction. MIT Press, Cambridge"},{"key":"BF02481264_CR10","first-page":"341","volume":"8","author":"PD Dayan","year":"1992","unstructured":"Dayan PD (1992) The convergence of TD() for general \u03b3. Mach Learn 8:341\u2013362","journal-title":"Mach Learn"},{"key":"BF02481264_CR11","first-page":"9","volume":"3","author":"RS Sutton","year":"1998","unstructured":"Sutton RS (1998) Learning to predict by the methods of temporal differences. Mach Learn 3:9\u201344","journal-title":"Mach Learn"},{"key":"BF02481264_CR12","unstructured":"Loch J, Singh S (1998) Using eligibility traces to find the best memoryless policy in partially observable Markov decision processes. ICML-98"},{"key":"BF02481264_CR13","first-page":"123","volume":"22","author":"SP Singh","year":"1996","unstructured":"Singh SP, Sutton RS (1996) Reinforcement learning with replacing eligibility traces. Mach Learn 22:123\u2013158","journal-title":"Mach Learn"}],"container-title":["Artificial Life and Robotics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/BF02481264.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/BF02481264\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/BF02481264","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,17]],"date-time":"2019-05-17T17:21:46Z","timestamp":1558113706000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/BF02481264"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2002,12]]},"references-count":13,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2002,12]]}},"alternative-id":["BF02481264"],"URL":"https:\/\/doi.org\/10.1007\/bf02481264","relation":{},"ISSN":["1433-5298","1614-7456"],"issn-type":[{"value":"1433-5298","type":"print"},{"value":"1614-7456","type":"electronic"}],"subject":[],"published":{"date-parts":[[2002,12]]}}}