{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T20:48:04Z","timestamp":1774385284397,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":16,"publisher":"ACM","license":[{"start":{"date-parts":[[2007,5,14]],"date-time":"2007-05-14T00:00:00Z","timestamp":1179100800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000145","name":"Division of Information and Intelligent Systems","doi-asserted-by":"publisher","award":["IIS-0237699"],"award-info":[{"award-number":["IIS-0237699"]}],"id":[{"id":"10.13039\/100000145","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000185","name":"Defense Advanced Research Projects Agency","doi-asserted-by":"publisher","award":["HR0011-04-1-0035"],"award-info":[{"award-number":["HR0011-04-1-0035"]}],"id":[{"id":"10.13039\/100000185","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["EIA-0303609"],"award-info":[{"award-number":["EIA-0303609"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2007,5,14]]},"DOI":"10.1145\/1329125.1329241","type":"proceedings-article","created":{"date-parts":[[2008,1,18]],"date-time":"2008-01-18T15:04:38Z","timestamp":1200668678000},"page":"1-8","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":32,"title":["Batch reinforcement learning in a complex domain"],"prefix":"10.1145","author":[{"given":"Shivaram","family":"Kalyanakrishnan","sequence":"first","affiliation":[{"name":"The University of Texas at Austin"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Peter","family":"Stone","sequence":"additional","affiliation":[{"name":"The University of Texas at Austin"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2007,5,14]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","unstructured":"J. S. Albus. Brains Behavior and Robotics. BYTE Books Peterborough 1981.","DOI":"10.5555\/542806"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.5555\/1046920.1088690"},{"key":"e_1_3_2_1_3_1","volume-title":"Proceedings of the RoboCup International Symposium 2006","author":"Kalyanakrishnan S.","year":"2006","unstructured":"S. Kalyanakrishnan, Y. Liu, and P. Stone. Half field offense in RoboCup soccer: A multiagent reinforcement learning case study. Proceedings of the RoboCup International Symposium 2006, June 2006."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.5555\/945365.964290"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992699"},{"key":"e_1_3_2_1_6_1","volume-title":"Users manual: RoboCup soccer server --- for soccer server version 7.07 and later","author":"Chen M.","year":"2002","unstructured":"M. Chen, E. Foroughi, F. Heintz, Z. Huang, S. Kapetanakis, K. Kostiadis, J. Kummeneje, I. Noda, O. Obst, P. Riley, T. Steffens, Y. Wang, and X. Yin. Users manual: RoboCup soccer server --- for soccer server version 7.07 and later. The RoboCup Federation, August 2002."},{"key":"e_1_3_2_1_7_1","volume-title":"Advances in Neural Information Processing Systems 16","author":"Ng A. Y.","year":"2004","unstructured":"A. Y. Ng, H. J. Kim, M. I. Jordan, and S. Sastry. Autonomous helicopter flight via reinforcement learning. In S. Thrun, L. Saul, and B. Sch\u00f6lkopf, editors, Advances in Neural Information Processing Systems 16. MIT Press, Cambridge, MA, 2004."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1007\/11564096_32"},{"key":"e_1_3_2_1_9_1","unstructured":"G. A. Rummery and M. Niranjan. On-line Q-learning using connectionist systems. Technical Report CUED\/F-INFENG\/TR 166 Cambridge University Engineering Department 1994."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.5555\/646586.696867"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1177\/105971230501300301"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.5555\/530951"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/1143997.1144202"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/1082473.1082482"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"}],"event":{"name":"AAMAS07: International Conference on Autonomous Agents and Mulitagent Systems","location":"Honolulu Hawaii","acronym":"AAMAS07","sponsor":["IFAAMAS"]},"container-title":["Proceedings of the 6th international joint conference on Autonomous agents and multiagent systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1329125.1329241","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/1329125.1329241","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T17:21:16Z","timestamp":1774372876000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1329125.1329241"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2007,5,14]]},"references-count":16,"alternative-id":["10.1145\/1329125.1329241","10.1145\/1329125"],"URL":"https:\/\/doi.org\/10.1145\/1329125.1329241","relation":{},"subject":[],"published":{"date-parts":[[2007,5,14]]},"assertion":[{"value":"2007-05-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}