{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T07:51:42Z","timestamp":1760255502746},"reference-count":23,"publisher":"Elsevier","isbn-type":[{"value":"9781558603776","type":"print"}],"license":[{"start":{"date-parts":[[1995,1,1]],"date-time":"1995-01-01T00:00:00Z","timestamp":788918400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[1995]]},"DOI":"10.1016\/b978-1-55860-377-6.50044-x","type":"book-chapter","created":{"date-parts":[[2014,7,1]],"date-time":"2014-07-01T02:59:45Z","timestamp":1404183585000},"page":"295-303","source":"Crossref","is-referenced-by-count":29,"title":["Reinforcement Learning by Stochastic Hill Climbing on Discounted Reward"],"prefix":"10.1016","author":[{"given":"Hajime","family":"Kimura","sequence":"first","affiliation":[]},{"given":"Masayuki","family":"Yamamura","sequence":"additional","affiliation":[]},{"given":"Shigenobu","family":"Kobayashi","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/B978-1-55860-377-6.50044-X_bib1","unstructured":"Baird, L. C. Reinforcement Learning in Continuous Time: Advantage Updating, Proc. of IEEE International Conference on Neural Networks Vol. IV, pp. 2448\u20132453(1994)."},{"key":"10.1016\/B978-1-55860-377-6.50044-X_bib2","unstructured":"Cassandra, A. R., Kaelbling, L. P., and Littman, M. L. Acting Optimally in Partially Observable Stochastic Domains, Proc. of 12th National Conference on Artificial Intelligence, Vol. 2, pp. 1023\u20131028(1994)."},{"key":"10.1016\/B978-1-55860-377-6.50044-X_bib3","unstructured":"Chrisman, L. Reinforcement learning with perceptual aliasing: The Perceptual Distinctions Approach, Proc. of 10th National Conference on Artificial Intelligence, pp. 183\u2013188(1992)."},{"key":"10.1016\/B978-1-55860-377-6.50044-X_bib4","doi-asserted-by":"crossref","first-page":"225","DOI":"10.1007\/BF00113898","article-title":"Credit Assignment in Rule Discovery Systems Based on Genetic Algorithms","volume":"3","author":"Grefenstette","year":"1988","journal-title":"Machine Learning"},{"key":"10.1016\/B978-1-55860-377-6.50044-X_bib5","first-page":"593","article-title":"Escaping brittleness","volume":"Volume II","author":"Holland","year":"1986"},{"key":"10.1016\/B978-1-55860-377-6.50044-X_bib6","unstructured":"Jaakkola, T., and Singh, S. P., and Jordan, M. I.: Reinforcement Learning Algorithm for Partially Observable Markov Decision Problems, Advances in Neural Information Processing Systems (NIPS-94)."},{"key":"10.1016\/B978-1-55860-377-6.50044-X_bib7","unstructured":"Kimura, H., Yamamura, M., and Kobayashi, S.: Reinforcement learning with delayed rewards on continuous state space, Proc. of the 3rd International Conference on Fuzzy Logic, Neural Nets and Soft Computing (Iizuka, Japan, August 1\u20137, 1994) p.p. 289\u2013292."},{"key":"10.1016\/B978-1-55860-377-6.50044-X_bib8","unstructured":"Liepins, G. E., Hilliard, M. R., Palmer, M., and Rangarajan, G.: Alternatives for Classifier System Credit Assignment, Eleventh International Joint Conference on Artificial Intelligent, pp. 756\u2013761(1989)."},{"key":"10.1016\/B978-1-55860-377-6.50044-X_bib9","doi-asserted-by":"crossref","unstructured":"Lin, L. Self-improving Reactive Agents: Case studies of Reinforcement Learning Framework. Proc. of 1st International Conference on Simulation of Adaptive Behavior, pp. 297\u2013305(1990).","DOI":"10.7551\/mitpress\/3115.003.0041"},{"key":"10.1016\/B978-1-55860-377-6.50044-X_bib10","doi-asserted-by":"crossref","unstructured":"Lin, L. Scaling Up Reinforcement Learning for Robot Control, Proc. of the tenth International Conference on Machine Learning, pp. 182\u2013189(1993).","DOI":"10.1016\/B978-1-55860-307-3.50030-7"},{"key":"10.1016\/B978-1-55860-377-6.50044-X_bib11","doi-asserted-by":"crossref","unstructured":"Littman, M.L. Markov games as a framework for multi-agent reinforcement learning, Proc. of 11th International Conference on Machine Learning, pp. 157\u2013163(1994).","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"10.1016\/B978-1-55860-377-6.50044-X_bib12","doi-asserted-by":"crossref","unstructured":"McCallum, R.A. Overcoming Incomplete Perception with Utile Distinction Memory, Proc. of 10th International Conference on Machine Learning, pp. 190\u2013196(1993).","DOI":"10.1016\/B978-1-55860-307-3.50031-9"},{"issue":"No.4","key":"10.1016\/B978-1-55860-377-6.50044-X_bib13","article-title":"A theory of Profit Sharing in Reinforcement Learning","volume":"9","author":"Miyazaki","year":"1994","journal-title":"Journal of Japanese Society for Artificial Intelligence"},{"key":"10.1016\/B978-1-55860-377-6.50044-X_bib14","doi-asserted-by":"crossref","first-page":"323","DOI":"10.1109\/TSMC.1974.5408453","article-title":"Learning automata- A survey","volume":"SMC-4","author":"Narendra","year":"1974","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics"},{"key":"10.1016\/B978-1-55860-377-6.50044-X_bib15","doi-asserted-by":"crossref","unstructured":"Schwartz, A. A Reinforcement Learning Method for Maximizing Undiscounted Rewards, Proc. of 10th International Conference on Machine Learning, pp. 298\u2013305(1993).","DOI":"10.1016\/B978-1-55860-307-3.50045-9"},{"key":"10.1016\/B978-1-55860-377-6.50044-X_bib16","doi-asserted-by":"crossref","unstructured":"Singh, S.P., Jaakkola, T., and Jordan, M.I. Learning Without State-Estimation in Partially Observable Markovian Decision Processes, Proc. of 11th International Conference on Machine Learning, pp. 284\u2013292(1994).","DOI":"10.1016\/B978-1-55860-335-6.50042-8"},{"key":"10.1016\/B978-1-55860-377-6.50044-X_bib17","unstructured":"Singh, S.P. Reinforcement Learning Algorithms for Average-Pay off Markovian Decision Processes, Proc. of 12th National Conference on Artificial Intelligence, Vol. 1, pp. 700\u2013705(1994)."},{"key":"10.1016\/B978-1-55860-377-6.50044-X_bib18","doi-asserted-by":"crossref","first-page":"9","DOI":"10.1007\/BF00115009","article-title":"Learning to Predict by the Methods of Temporal Differences","volume":"3","author":"Sutton","year":"1988","journal-title":"Machine Learning"},{"key":"10.1016\/B978-1-55860-377-6.50044-X_bib19","doi-asserted-by":"crossref","first-page":"55","DOI":"10.1007\/BF00992698","article-title":"Technical Note: Q-Learning","volume":"8","author":"Watkins","year":"1992","journal-title":"Machine Learning"},{"issue":"no. 6","key":"10.1016\/B978-1-55860-377-6.50044-X_bib20","doi-asserted-by":"crossref","first-page":"519","DOI":"10.1109\/TAC.1986.1104342","article-title":"Decentralized Learning in Finite Markov Chains","volume":"AC-31","author":"Wheeler","year":"1986","journal-title":"IEEE Transactions on Automatic Control"},{"key":"10.1016\/B978-1-55860-377-6.50044-X_bib21","unstructured":"Whitehead, S.D., and Ballard, D.H. Active Perception and Reinforcement Learning, Proc. of 7th International Conference on Machine Learning, pp. 162\u2013169(1990)."},{"key":"10.1016\/B978-1-55860-377-6.50044-X_bib22","unstructured":"Williams, R.J. A Class of Gradient- Estimating Algorithms for Reinforcement learning in Neural Networks, IEEE First International Conference on Neural Networks, volume II, pp. 601\u2013608(1987)."},{"key":"10.1016\/B978-1-55860-377-6.50044-X_bib23","doi-asserted-by":"crossref","first-page":"229","DOI":"10.1007\/BF00992696","article-title":"Simple Statistical Gradient Following Algorithms for Connectionist Reinforcement Learning","volume":"8","author":"Williams","year":"1992","journal-title":"Machine Learning"}],"container-title":["Machine Learning Proceedings 1995"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:B978155860377650044X?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:B978155860377650044X?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2024,5,28]],"date-time":"2024-05-28T10:42:48Z","timestamp":1716892968000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/B978155860377650044X"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[1995]]},"ISBN":["9781558603776"],"references-count":23,"URL":"https:\/\/doi.org\/10.1016\/b978-1-55860-377-6.50044-x","relation":{},"subject":[],"published":{"date-parts":[[1995]]}}}