{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,24]],"date-time":"2025-03-24T04:03:10Z","timestamp":1742788990599,"version":"3.40.2"},"publisher-location":"Berlin, Heidelberg","reference-count":15,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642284861"},{"type":"electronic","value":"9783642284878"}],"license":[{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-28487-8_28","type":"book-chapter","created":{"date-parts":[[2012,3,14]],"date-time":"2012-03-14T07:34:34Z","timestamp":1331710474000},"page":"270-280","source":"Crossref","is-referenced-by-count":1,"title":["Evaluation of the Improved Penalty Avoiding Rational Policy Making Algorithm in Real World Environment"],"prefix":"10.1007","author":[{"given":"Kazuteru","family":"Miyazaki","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Masaki","family":"Itou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hiroaki","family":"Kobayashi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"28_CR1","doi-asserted-by":"crossref","unstructured":"Abbeel, P., Ng, A.Y.: Exploration and apprenticeship learning in reinforcement learning. In: Proc. of the 22nd International Conference on Machine Learning, pp. 1\u20138 (2005)","DOI":"10.1145\/1102351.1102352"},{"issue":"6","key":"28_CR2","doi-asserted-by":"publisher","first-page":"537","DOI":"10.1527\/tjsai.21.537","volume":"21","author":"S. Arai","year":"2006","unstructured":"Arai, S., Tanaka, N.: Experimental Analysis of Reward Design for Continuing Task in Multiagent Domains \u2013 RoboCup Soccer Keepaway. Transactions of the Japanese Society for Artificial Intelligence\u00a021(6), 537\u2013546 (2006) (in Japanese)","journal-title":"Transactions of the Japanese Society for Artificial Intelligence"},{"key":"28_CR3","unstructured":"Kimura, H., Kobayashi, S.: An analysis of actor\/critic algorithm using eligibility traces: reinforcement learning with imperfect value function. In: Proc. of the 15th Int. Conf. on Machine Learning, pp. 278\u2013286 (1998)"},{"issue":"2","key":"28_CR4","first-page":"173","volume":"2","author":"T. Hong","year":"2011","unstructured":"Hong, T., Wu, C.: An Improved Weighted Clustering Algorithm for Determination of Application Nodes in Heterogeneous Sensor Networks. J. of Information Hiding and Multimedia Signal Processing.\u00a02(2), 173\u2013184 (2011)","journal-title":"J. of Information Hiding and Multimedia Signal Processing."},{"key":"28_CR5","doi-asserted-by":"crossref","unstructured":"Kuroda, S., Miyazaki, K., Kobayashi, H.: Introduction of Fixed Mode States into Online Profit Sharing and Its Application to Waist Trajectory Generation of Biped Robot. In: European Workshop on Reinforcement Learning 9 (2011)","DOI":"10.1007\/978-3-642-29946-9_29"},{"issue":"4","key":"28_CR6","first-page":"253","volume":"1","author":"T.C. Lin","year":"2007","unstructured":"Lin, T.C., Huang, H.C., Liao, B.Y., Pan, J.S.: An Optimized Approach on Applying Genetic Algorithm to Adaptive Cluster Validity Index. International Journal of Computer Sciences and Engineering Systems\u00a01(4), 253\u2013257 (2007)","journal-title":"International Journal of Computer Sciences and Engineering Systems"},{"key":"28_CR7","unstructured":"Miyazaki, K., Yamamura, M., Kobayashi, S.: On the Rationality of Profit Sharing in Reinforcement Learning. In: Proc. of the 3rd Int. Conf. on Fuzzy Logic, Neural Nets and Soft Computing, pp. 285\u2013288 (1994)"},{"key":"28_CR8","unstructured":"Miyazaki, K., Kobayashi, S.: Learning Deterministic Policies in Partially Observable Markov Decision Processes. In: Proc. of 5th Int. Conf. on Intelligent Autonomous System, pp. 250\u2013257 (1998)"},{"key":"28_CR9","doi-asserted-by":"crossref","unstructured":"Miyazaki, K., Kobayashi, S.: Reinforcement Learning for Penalty Avoiding Policy Making. In: Proc. of the 2000 IEEE Int. Conf. on Systems, Man and Cybernetics, pp. 206\u2013211 (2000)","DOI":"10.1109\/ICSMC.2000.884990"},{"issue":"6","key":"28_CR10","doi-asserted-by":"publisher","first-page":"668","DOI":"10.20965\/jaciii.2007.p0668","volume":"11","author":"K. Miyazaki","year":"2007","unstructured":"Miyazaki, K., Kobayashi, S.: A Reinforcement Learning System for Penalty Avoiding in Continuous State Spaces. J. of Advanced Computational Intelligence and Intelligent Informatics\u00a011(6), 668\u2013676 (2007)","journal-title":"J. of Advanced Computational Intelligence and Intelligent Informatics"},{"issue":"6","key":"28_CR11","doi-asserted-by":"publisher","first-page":"624","DOI":"10.20965\/jaciii.2009.p0624","volume":"13","author":"K. Miyazaki","year":"2009","unstructured":"Miyazaki, K., Kobayashi, S.: Exploitation-Oriented Learning PS-r#. J. of Advanced Computational Intelligence and Intelligent Informatics\u00a013(6), 624\u2013630 (2009)","journal-title":"J. of Advanced Computational Intelligence and Intelligent Informatics"},{"key":"28_CR12","unstructured":"Ng, A.Y.,, Russell, S.J.: Algorithms for Inverse Reinforcement Learning. In: Proc. of the 17th Int. Conf. on Machine Learning, pp. 663\u2013670 (2000)"},{"issue":"3","key":"28_CR13","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1177\/105971230501300301","volume":"13","author":"P. Stone","year":"2005","unstructured":"Stone, P., Sutton, R.S., Kuhlamann, G.: Reinforcement Learning toward RoboCup Soccer Keepaway. Adaptive Behavior\u00a013(3), 0165\u20130188 (2005)","journal-title":"Adaptive Behavior"},{"key":"28_CR14","doi-asserted-by":"crossref","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. A Bradford Book. MIT Press (1998)","DOI":"10.1109\/TNN.1998.712192"},{"issue":"6","key":"28_CR15","doi-asserted-by":"publisher","first-page":"675","DOI":"10.20965\/jaciii.2009.p0675","volume":"13","author":"T. Watanabe","year":"2009","unstructured":"Watanabe, T., Miyazaki, K., Kobayashi, H.: A New Improved Penalty Avoiding Rational Policy Making Algorithm for Keepaway with Continuous State Spaces. J. of Advanced Computational Intelligence and Intelligent Informatics.\u00a013(6), 675\u2013682 (2009)","journal-title":"J. of Advanced Computational Intelligence and Intelligent Informatics."}],"container-title":["Lecture Notes in Computer Science","Intelligent Information and Database Systems"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-28487-8_28","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,23]],"date-time":"2025-03-23T03:24:31Z","timestamp":1742700271000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-28487-8_28"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642284861","9783642284878"],"references-count":15,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-28487-8_28","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2012]]}}}