{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T21:02:23Z","timestamp":1725570143748},"publisher-location":"London","reference-count":24,"publisher":"Springer London","isbn-type":[{"type":"print","value":"9780857291295"},{"type":"electronic","value":"9780857291301"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2011]]},"DOI":"10.1007\/978-0-85729-130-1_13","type":"book-chapter","created":{"date-parts":[[2010,11,11]],"date-time":"2010-11-11T19:14:28Z","timestamp":1289502868000},"page":"179-192","source":"Crossref","is-referenced-by-count":0,"title":["On Reinforcement Memory for Non-Markovian Control"],"prefix":"10.1007","author":[{"given":"Hassab Elgawi","family":"Osman","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2010,10,29]]},"reference":[{"key":"13_CR1","volume-title":"\u201cReinforcement Learning: An introduction,\u201d","author":"R. Sutton","year":"1998","unstructured":"Sutton, R., Barto, A.: \u201cReinforcement Learning: An introduction,\u201d. Cambring, MA: MIT Press (1998)."},{"key":"13_CR2","first-page":"215","volume-title":"Models of Information Processing in the Basal Ganglia","author":"A. Barto","year":"1995","unstructured":"Barto A.: \u201cAdaptive critics and the basal ganglia,\u201d. In: Models of Information Processing in the Basal Ganglia, pp.215-232. Cambridge, MA: MIT Press (1995)."},{"issue":"3","key":"13_CR3","doi-asserted-by":"publisher","first-page":"871","DOI":"10.1016\/S0306-4522(98)00697-6","volume":"91","author":"R. Suri","year":"1999","unstructured":"Suri, R., Schultz, W.: \u201cA neural network model with dopamine-like reinforcement signal that learns a spatial delayed response task,\u201d. In: Neuroscience\n                91(3):871-890 (1999).","journal-title":"Neuroscience"},{"key":"13_CR4","doi-asserted-by":"publisher","first-page":"841","DOI":"10.1162\/089976601300014376","volume":"13","author":"R. Suri","year":"2001","unstructured":"Suri, R., Schultz, W.: \u201cTemporal difference model reproduces anticipatory neural activity,\u201d. In: Neural Computation\n                13:841-862 (2001).","journal-title":"Neural Computation"},{"key":"13_CR5","unstructured":"Chrisman, L.: \u201cReinforcement learning with perceptual aliasing: The perceptual distinctions approach,\u201d. In: Proc. Int\u2019l. Conf on AAAI, pp.183-188 (1992)."},{"key":"13_CR6","unstructured":"Cassandra, A., Kaelbling, L., Littman, M.: \u201cActing optimally in partially observable stochastic domains,\u201d. In: Proc. Int\u2019l. Conf on AAAI, pp.1023-1028 (1994)."},{"key":"13_CR7","unstructured":"Sutton, R., McAllester, D., Singh, S., Mansour, Y.: \u201cPolicy gradient methods for reinforcement learning with function approximation,\u201d. In: Advances in Neural Information Processing Systems 12, pp. 1057-1063. MIT Press (2000)."},{"key":"13_CR8","unstructured":"Aberdeen, D., Baxter, J.: \u201cScalable Internal-State Policy-Gradient Methods for POMDPs,\u201d. In: Proc. of 19th Int\u2019l Conf. on Machine Learning\n                12, pp.3-10. Morgan Kaufmann Publishers Inc. (2002)."},{"key":"13_CR9","first-page":"59","volume":"22","author":"J. Tsitsiklis","year":"1996","unstructured":"Tsitsiklis, J., Van Roy, B.: \u201cFeatured-based methods for large scale dynamic programming,\u201d. In: Machine Learning\n                22:59-94 (1996).","journal-title":"Machine Learning"},{"key":"13_CR10","unstructured":"Hassab Elgawi, O.: \u201cRL-Based Memory Controller for Scalable Autonomous Systems,\u201d In: Advances in Neuro-Information Processing, Chi-Sing Leung, Minho Lee, Jonathan Hoyin Chan (Eds.), Part II, LNCS 5864, pp.83-92, (2009)."},{"key":"13_CR11","doi-asserted-by":"publisher","first-page":"2062","DOI":"10.1162\/neco.2006.18.9.2062","volume":"18","author":"J. Basak","year":"2004","unstructured":"Basak, J.: \u201cOnline adaptive decision trees: Pattern classification and function approximation,\u201d. Neural Comput\n                18:2062-2101 (2004).","journal-title":"Neural Comput"},{"key":"13_CR12","doi-asserted-by":"crossref","unstructured":"Hassab Elgawi, O.: \u201cOnline Random Forests based on CorrFS and CorrBE,\u201d In In: Proc. of Conf on Computer Vision and Pattern Recognition Workshop, CVPR, pp.1-7 (2008).","DOI":"10.1109\/CVPRW.2008.4563065"},{"key":"13_CR13","doi-asserted-by":"crossref","unstructured":"Ipek, E., Mutlu, O., Martinez, J., Caruana, R.: \u201cSelf-Optimizing Memory Controllers: A Reinforcement Learning Approach,\u201d. In: Intl. Symp. on Computer Architecture (ISCA), pp.39-50 (2008).","DOI":"10.1109\/ISCA.2008.21"},{"key":"13_CR14","unstructured":"Goldberger, J., Roweis, S., Hinton, G., Salakhutdinov, R.: \u201cNeighbourhood Components Analysis,\u201d. In: Advances in Neural Information Processing Systems\n                17, MIT Press, pp.513-520 (2005)."},{"key":"13_CR15","doi-asserted-by":"crossref","unstructured":"Keller, P., Mannor, S., Precup, D.: \u201cAutomatic basis function construction for approximate dynamic programming and reinforcement learning,\u201d. In: 23rd International Conference on Machine Learning, pp.449-456 (2006).","DOI":"10.1145\/1143844.1143901"},{"key":"13_CR16","first-page":"287","volume-title":"Proc. of the Eighth Int\u2019l Conf. on Simulation of Adaptive Behavior: From Animals to Animats, 8","author":"E. Uchibe","year":"2006","unstructured":"Uchibe, E., Doya, K.: (2006) \u201cCompetitive-Cooperative-Concurrent Reinforcement Learning with Importance Sampling,\u201d. In: Proc. of the Eighth Int\u2019l Conf. on Simulation of Adaptive Behavior: From Animals to Animats, 8, MIT Press, Cambridge, MA, 2004, pp.287-296."},{"key":"13_CR17","first-page":"99","volume":"101","author":"P. Leslie","year":"1995","unstructured":"Leslie P., Michael L., Anthony R. \u201cPlanning and acting in partially observable stochastic domains,\u201d. Artificial Intelligence, 101:99-134 (1995).","journal-title":"Artificial Intelligence"},{"key":"13_CR18","unstructured":"Hassab Elgawi, O.: \u201cArchitecture of behavior-based Function Approximator for Adaptive Control,\u201d. In: Proc. 15th Int\u2019l. Conf on Neural Information Processing ICONIP, LNCS 5507, pp.104-111 (2008)."},{"issue":"2","key":"13_CR19","doi-asserted-by":"crossref","first-page":"155","DOI":"10.20965\/jaciii.2009.p0155","volume":"13","author":"O. Hassab Elgawi","year":"2009","unstructured":"Hassab Elgawi, O.: \u201cRandom-TD Function Approximator,\u201d In: Journal of Advanced Computational Intelligence and Intelligent Informatics (JACIII), 13(2):155-161 (2009).","journal-title":"Journal of Advanced Computational Intelligence and Intelligent Informatics (JACIII)"},{"key":"13_CR20","unstructured":"Meuleau, N., Peshkin, L., Kim, K.-E., Kaelbling, L.: \u201cLearning finite-state controllers for partially observable environments,\u201d. In: Proc of the 15th Int\u2019l Conf on Uncertainty in Artificial Intelligence, pp.427-436 (1999)."},{"key":"13_CR21","unstructured":"Peshkin, L., Meuleau, N., Kaelbling, L.: \u201cLearning policies with external memory,\u201d. In: Proc. of the 16th Int\u2019l Conf on Machine Learning, pp.307-314, I. Bratko and S. Dzeroski, (Eds.) (1999) On Reinforcement Memory for Non-markovian Control"},{"key":"13_CR22","unstructured":"Kenneth, O.: \u201cEfficient evolution of neural networks through complexification,\u201d. Ph.D. Thesis; Department of Computer Sciences, The University of Texas at Austin. Technical Report AITR-04-314 (2004)."},{"key":"13_CR23","unstructured":"Gomez, F.: \u201cRobust non-linear control through neuroevolution,\u201d. Ph.D. Thesis; Department of Computer Sciences, The University of Texas at Austin. Technical Report AI-TR-03-303 (2003)."},{"issue":"2","key":"13_CR24","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1177\/105971239700600201","volume":"6","author":"J. Santamaria","year":"1998","unstructured":"Santamaria, J., Sutton, R., Ram, A.: \u201cExperiments with reinforcement learning in problems with continuous state and action spaces,\u201d. In:Adaptive Behavior, 6(2):163-218 (1998).","journal-title":"Adaptive Behavior"}],"container-title":["Research and Development in Intelligent Systems XXVII"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-0-85729-130-1_13.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,4,29]],"date-time":"2021-04-29T04:39:58Z","timestamp":1619671198000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-0-85729-130-1_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,10,29]]},"ISBN":["9780857291295","9780857291301"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-0-85729-130-1_13","relation":{},"subject":[],"published":{"date-parts":[[2010,10,29]]}}}