{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T07:35:18Z","timestamp":1725521718334},"publisher-location":"Berlin, Heidelberg","reference-count":9,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540891963"},{"type":"electronic","value":"9783540891970"}],"license":[{"start":{"date-parts":[[2008,1,1]],"date-time":"2008-01-01T00:00:00Z","timestamp":1199145600000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2008]]},"DOI":"10.1007\/978-3-540-89197-0_18","type":"book-chapter","created":{"date-parts":[[2008,12,3]],"date-time":"2008-12-03T12:23:50Z","timestamp":1228307030000},"page":"164-174","source":"Crossref","is-referenced-by-count":1,"title":["Behavior Learning Based on a Policy Gradient Method: Separation of Environmental Dynamics and State Values in Policies"],"prefix":"10.1007","author":[{"given":"Seiji","family":"Ishihara","sequence":"first","affiliation":[]},{"given":"Harukazu","family":"Igarashi","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"18_CR1","volume-title":"Reinforcement Learning","author":"R.S. Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning. MIT Press, Cambridge (1998)"},{"key":"18_CR2","first-page":"229","volume":"8","author":"R.J. Williams","year":"1992","unstructured":"Williams, R.J.: Simple Statistical Gradient-Following Algorithms for Connectionist Reinforcement Learning. Machine Learning\u00a08, 229\u2013256 (1992)","journal-title":"Machine Learning"},{"doi-asserted-by":"crossref","unstructured":"Kimura, H., Yamamura, M., Kobayashi, S.: Reinforcement Learning by Stochastic Hill Climbing on Discounted Reward. In: Proceedings of the 12th International Conference on Machine Learning, pp. 295\u2013303 (1995)","key":"18_CR3","DOI":"10.1016\/B978-1-55860-377-6.50044-X"},{"unstructured":"Sutton, R.S., McAllester, D., Singh, S., Mansour, Y.: Policy Gradient Methods for Reinforcement Learning with Function Approximation. In: Advances in Neural Information Processing Systems (Proc. NIPS 1999 Conf.), vol.\u00a012, pp. 1057\u20131063 (2000)","key":"18_CR4"},{"unstructured":"Konda, V.R., Tsitsiklis, J.N.: Actor-Critic Algorithms. In: Advances in Neural Information Processing Systems (Proc. NIPS 1999 Conf.), vol.\u00a012, pp. 1008\u20131014 (2000)","key":"18_CR5"},{"unstructured":"Baird, L., Moore, A.: Gradient Descent for General Reinforcement Learning. In: Advances in Neural Information Processing Systems (Proc. NIPS 1998 Conf.), vol.\u00a011, pp. 968\u2013974 (1999)","key":"18_CR6"},{"issue":"9","key":"18_CR7","first-page":"2271","volume":"J90-D","author":"H. Igarashi","year":"2007","unstructured":"Igarashi, H., Ishihara, S., Kimura, M.: Reinforcement Learning in Non-Markov Decision Processes \u2014Statistical Properties of Characteristic Eligibility. IEICE Transactions on Information and Systems\u00a0J90-D(9), 2271\u20132280 (2007) (in Japanese)","journal-title":"IEICE Transactions on Information and Systems"},{"issue":"10","key":"18_CR8","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1002\/scj.20248","volume":"37","author":"S. Ishihara","year":"2006","unstructured":"Ishihara, S., Igarashi, H.: Applying the Policy Gradient Method to Behavior Learning in Multi-agent Systems: The Pursuit Problem. Systems and Computers in Japan\u00a037(10), 101\u2013109 (2006)","journal-title":"Systems and Computers in Japan"},{"unstructured":"Peshkin, L., Kim, K.E., Meuleau, N., Kaelbling, L.P.: Learning to cooperative via policy search. In: Proc. of 16th Conference on Uncertainty in Artificial Intelligence (UAI 2000), pp. 489\u2013496 (2000)","key":"18_CR9"}],"container-title":["Lecture Notes in Computer Science","PRICAI 2008: Trends in Artificial Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-89197-0_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,3,4]],"date-time":"2019-03-04T08:34:26Z","timestamp":1551688466000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-89197-0_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2008]]},"ISBN":["9783540891963","9783540891970"],"references-count":9,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-89197-0_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2008]]}}}