{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,4]],"date-time":"2024-09-04T23:44:41Z","timestamp":1725493481860},"publisher-location":"Berlin, Heidelberg","reference-count":12,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540423270"},{"type":"electronic","value":"9783540445685"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2001]]},"DOI":"10.1007\/3-540-44568-4_9","type":"book-chapter","created":{"date-parts":[[2007,10,28]],"date-time":"2007-10-28T05:32:47Z","timestamp":1193549567000},"page":"137-149","source":"Crossref","is-referenced-by-count":5,"title":["Reinforcement Learning for Cooperating and Communicating Reactive Agents in Electrical Power Grids"],"prefix":"10.1007","author":[{"given":"Martin","family":"Riedmiller","sequence":"first","affiliation":[]},{"given":"Andrew","family":"Moore","sequence":"additional","affiliation":[]},{"given":"Jeff","family":"Schneider","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2001,7,4]]},"reference":[{"key":"9_CR1","unstructured":"L. C. Baird. Residual algorithms: Reinforcement learning with function approximation. In Machine Learning: Proceedings of the 12th International Conference, 1995."},{"key":"9_CR2","unstructured":"A. G. Barto and R. H. Crites. Improving elevator performance using reinforcement learning. In M. E. Hasselmo D. S. Touretzky, M. C. Mozer, editors, Advances in Neural Information Processing Systems 8. MIT Press, 1996."},{"key":"9_CR3","unstructured":"C. Boutilier. Sequential optimality and coordination in multi agent systems. Proceedings of 16.th Joint Conference on Artificial Intelligence, IJCAI, pages 178\u2013185, 1998."},{"key":"9_CR4","doi-asserted-by":"crossref","unstructured":"W. Brauer and G. Weiss. Multi-machine scheduling \u2014 a multi-agent learning approach. In Proceedings of the 3rd International Conference on Multi-Agent Systems, pages 42\u201348, 1998.","DOI":"10.1109\/ICMAS.1998.699030"},{"key":"9_CR5","first-page":"371","volume-title":"Proceedings of International Conference on Machine Learning, ICML\u201999","author":"W. Wong","year":"1999","unstructured":"W. Wong A. Moore J. Schneider and M. Riedmiller. Distributed value functions. In Proceedings of International Conference on Machine Learning, ICML\u201999, pages 371\u2013378, Bled, Slovenia, 1999."},{"key":"9_CR6","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"L. P. Kaelbling","year":"1996","unstructured":"L. P. Kaelbling, M. L. Littman, and A. W. Moore. Reinforcement learning: A survey. Journal of Artificial Intelligence Research, 4:237\u2013285, 1996.","journal-title":"Journal of Artificial Intelligence Research"},{"key":"9_CR7","unstructured":"M. Lauer and M. Riedmiller. An algorithm for distributed reinforcement learning in cooperative multi-agent systems. In Proceedings of International Conference on Machine Learning, ICML\u2019 00, pages 535\u2013542, Stanford, CA, 2000."},{"key":"9_CR8","unstructured":"S. Mahadevan and G. Theocharous. Optimization production manufacturing using reinforcement learning. In Proceedings of the Eleventh International FLAIRS Conference, pages 372\u2013377. AAAI Press, 1998."},{"key":"9_CR9","doi-asserted-by":"publisher","first-page":"323","DOI":"10.1007\/s005210050038","volume":"8","author":"M. Riedmiller","year":"2000","unstructured":"M. Riedmiller. Concepts and facilities of a neural reinforcement learning control architecture for technical process control. Journal of Neural Computing and Application, 8:323\u2013338, 2000.","journal-title":"Journal of Neural Computing and Application"},{"key":"9_CR10","unstructured":"S. Riedmiller and M. Riedmiller. A neural reinforcement learning approach to learn local dispatching policies in production scheduling. In Proceedings of International Joint Conference on Artificial Intelligence, ICJAI\u201999, Stockholm, 1999."},{"key":"9_CR11","unstructured":"C. J. Watkins. Learning from Delayed Rewards. PhD thesis, Cambridge University, 1989."},{"key":"9_CR12","unstructured":"M. Woolridge. Intelligent agents. In G. Weiss, editor, Multi Agent Systems. MIT Press, 1999."}],"container-title":["Lecture Notes in Computer Science","Balancing Reactivity and Social Deliberation in Multi-Agent Systems"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/3-540-44568-4_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,2,24]],"date-time":"2019-02-24T17:47:19Z","timestamp":1551030439000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/3-540-44568-4_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2001]]},"ISBN":["9783540423270","9783540445685"],"references-count":12,"URL":"https:\/\/doi.org\/10.1007\/3-540-44568-4_9","relation":{},"ISSN":["0302-9743"],"issn-type":[{"type":"print","value":"0302-9743"}],"subject":[],"published":{"date-parts":[[2001]]}}}