{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,5]],"date-time":"2025-10-05T16:50:34Z","timestamp":1759683034506},"reference-count":23,"publisher":"Springer Science and Business Media LLC","issue":"1-2","license":[{"start":{"date-parts":[[2005,5,1]],"date-time":"2005-05-01T00:00:00Z","timestamp":1114905600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2005,5]]},"DOI":"10.1007\/s10994-005-0461-8","type":"journal-article","created":{"date-parts":[[2005,6,9]],"date-time":"2005-06-09T09:02:36Z","timestamp":1118307756000},"page":"31-54","source":"Crossref","is-referenced-by-count":19,"title":["A Reinforcement Learning Scheme for a Partially-Observable Multi-Agent Game"],"prefix":"10.1007","volume":"59","author":[{"given":"Shin","family":"Ishii","sequence":"first","affiliation":[]},{"given":"Hajime","family":"Fujita","sequence":"additional","affiliation":[]},{"given":"Masaoki","family":"Mitsutake","sequence":"additional","affiliation":[]},{"given":"Tatsuya","family":"Yamazaki","sequence":"additional","affiliation":[]},{"given":"Jun","family":"Matsuda","sequence":"additional","affiliation":[]},{"given":"Yoichiro","family":"Matsuno","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"461_CR1","doi-asserted-by":"crossref","first-page":"834","DOI":"10.1109\/TSMC.1983.6313077","volume":"13","author":"A. G. Barto","year":"1983","unstructured":"Barto, A. G., Sutton, R. S., & Anderson, C. W. (1983). Neuronlike adaptive elements that can solve difficult learning control problems. IEEE Trans. Syst., Man. & Cybern., 13, 834\u2013846.","journal-title":"IEEE Trans. Syst., Man. & Cybern."},{"key":"461_CR2","doi-asserted-by":"crossref","first-page":"131","DOI":"10.1111\/j.1467-8640.1996.tb00256.x","volume":"12","author":"J. R. S. Blair","year":"1995","unstructured":"Blair, J. R. S., Mutchler, D., & Lent, M. (1995). Perfect recall and pruning in games with imperfect information. Computational Intelligence, 12, 131\u2013154.","journal-title":"Computational Intelligence"},{"key":"461_CR3","unstructured":"Crites, R. H. (1996). Large-scale dynamic optimization using teams of reinforcement learning agents. Ph.D. thesis, University of Massachusetts, Amherst."},{"key":"461_CR4","doi-asserted-by":"crossref","first-page":"235","DOI":"10.1023\/A:1007518724497","volume":"33","author":"R. H. Crites","year":"1996","unstructured":"Crites, R. H., & Barto, A. G. (1996). Elevator group control using multiple reinforcement learning agents. Machine Learning, 33, 235\u2013262.","journal-title":"Machine Learning"},{"key":"461_CR5","doi-asserted-by":"crossref","first-page":"303","DOI":"10.1613\/jair.820","volume":"14","author":"M. Ginsberg","year":"2001","unstructured":"Ginsberg, M. (2001). Gib: Imperfect information in a computationally challenging fame. Journal of Artificial Intelligence Research, 14, 303\u2013358.","journal-title":"Journal of Artificial Intelligence Research"},{"key":"461_CR6","unstructured":"Hu, J., & Wellman, M. P. (1998). Multiagent reinforcement learning: Theoretical framework and an algorithm. In Proceedings of the Fifteenth International Conference on Machine Learning (pp. 242\u2013250)."},{"key":"461_CR7","doi-asserted-by":"crossref","first-page":"665","DOI":"10.1016\/S0893-6080(02)00056-4","volume":"15","author":"S. Ishii","year":"2002","unstructured":"Ishii, S., Yoshida, W., & Yoshimoto, J. (2002). Control of exploitation-exploration meta-parameter in reinforcement learning. Neural Networks, 15, 665\u2013687.","journal-title":"Neural Networks"},{"key":"461_CR8","doi-asserted-by":"crossref","first-page":"99","DOI":"10.1016\/S0004-3702(98)00023-X","volume":"101","author":"L. P. Kaelbling","year":"1998","unstructured":"Kaelbling, L. P., Littman, M. L., & Cassandra, A. (1998). Planning and acting in partially observable stochastic domains. Artificial Intelligence, 101, 99\u2013134.","journal-title":"Artificial Intelligence"},{"key":"461_CR9","unstructured":"Lin, L.-J., & Mitchell, T. (1992). Memory approaches to reinforcement learning in non-markovian domains. Tech. rep., CMU-CS-92-138."},{"key":"461_CR10","doi-asserted-by":"crossref","unstructured":"Littman, M. L. (1994). Markov games as a framework for multi-agent reinforcement learning. In Proceedings of the 11th International Conference on Machine Learning (pp. 157\u2013163).","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"461_CR11","doi-asserted-by":"crossref","unstructured":"Matsuno, Y., Yamazaki, T., Matsuda, J., & Ishii, S. (2001). A multi-agent reinforcement learning method for a partially-observable competitive game. In Proceedings of the Fifth International Conference on Autonomous Agents (pp. 39\u201340).","DOI":"10.1145\/375735.375856"},{"key":"461_CR12","unstructured":"McCallum, A. (1995). Reinforcement learning with selective perception and hidden state. Ph.D. thesis, Univercity of Rochester."},{"key":"461_CR13","doi-asserted-by":"crossref","first-page":"281","DOI":"10.1162\/neco.1989.1.2.281","volume":"1","author":"J. Moody","year":"1989","unstructured":"Moody, J., & Darken, C. J. (1989). Fast learning in networks of locally-tuned processing units. Neural Computation, 1, 281\u2013294.","journal-title":"Neural Computation"},{"key":"461_CR14","first-page":"103","volume":"13","author":"A. Moore","year":"1993","unstructured":"Moore, A., & Atkeson, C. (1993). Prioritized sweeping: Reinforcement learning with less data and less real time. Machine Learning, 13, 103\u2013130.","journal-title":"Machine Learning"},{"key":"461_CR15","doi-asserted-by":"crossref","unstructured":"Nagayuki, Y., Ishii, S., & Doya, K. (2000). Multi-agent reinforcement learning: An approach based on the other agent\u2019s internal model. In Proceedings of the Fourth International Conference on MultiAgent Systems (pp. 215\u2013221).","DOI":"10.1109\/ICMAS.2000.858456"},{"key":"461_CR16","doi-asserted-by":"crossref","first-page":"263","DOI":"10.1023\/A:1007570708568","volume":"33","author":"R. P. Salustowicz","year":"1998","unstructured":"Salustowicz, R. P., Wiering, M. A., & Schmidhuber, J. (1998). Learning team strategies: Soccer case studies. Machine Learning, 33, 263\u2013282.","journal-title":"Machine Learning"},{"key":"461_CR17","doi-asserted-by":"crossref","first-page":"147","DOI":"10.1016\/0303-2647(95)01551-5","volume":"37","author":"T. W. Sandholm","year":"1995","unstructured":"Sandholm, T. W., & Crites, R. H. (1995). Multiagent reinforcement learning in the iterated prisoner\u2019s dilemma. Biosystems, 37, 147\u2013166.","journal-title":"Biosystems"},{"key":"461_CR18","doi-asserted-by":"crossref","first-page":"407","DOI":"10.1162\/089976600300015853","volume":"12","author":"M. Sato","year":"2000","unstructured":"Sato, M., & Ishii, S. (2000). On-line em algorithm for the normalized gaussian network. Neural Computation, 12, 407\u2013432.","journal-title":"Neural Computation"},{"key":"461_CR19","unstructured":"Sen, S., Sekaran, M., & Hale, J. (1994). Learning to coordinate without sharing information. In Proceedings of the Twelfth National Conference on Artificial Intelligence (pp. 426\u2013431)."},{"key":"461_CR20","doi-asserted-by":"crossref","unstructured":"Sutton, R., & Barto, A. (Eds.). (1998). Reinforcement learning: An introduction. MIT Press.","DOI":"10.1109\/TNN.1998.712192"},{"key":"461_CR21","doi-asserted-by":"crossref","unstructured":"Tan, M. (1993). Multi-agent reinforcement learning: Independent vs. cooperative agents. In Proceedings of the Tenth International Conference on Machine Learning (pp. 330\u2013337).","DOI":"10.1016\/B978-1-55860-307-3.50049-6"},{"key":"461_CR22","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1162\/neco.1994.6.2.215","volume":"6","author":"G. J. Tesauro","year":"1994","unstructured":"Tesauro, G. J. (1994). Td-gammon, a self-teaching backgammon program, achieves masterlevel play. Neural Computation, 6, 215\u2013219.","journal-title":"Neural Computation"},{"key":"461_CR23","doi-asserted-by":"crossref","first-page":"271","DOI":"10.1016\/0004-3702(94)00012-P","volume":"73","author":"S. Whitehead","year":"1995","unstructured":"Whitehead, S., & Lin, L.-J. (1995). Reinforcement learning of non-markov decision processes. Artificial Intelligence, 73, 271\u2013306.","journal-title":"Artificial Intelligence"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-005-0461-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10994-005-0461-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-005-0461-8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,4,7]],"date-time":"2020-04-07T15:47:27Z","timestamp":1586274447000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10994-005-0461-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2005,5]]},"references-count":23,"journal-issue":{"issue":"1-2","published-print":{"date-parts":[[2005,5]]}},"alternative-id":["461"],"URL":"https:\/\/doi.org\/10.1007\/s10994-005-0461-8","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2005,5]]}}}