{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T10:54:47Z","timestamp":1725533687618},"publisher-location":"Berlin, Heidelberg","reference-count":11,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642023187"},{"type":"electronic","value":"9783642023194"}],"license":[{"start":{"date-parts":[[2009,1,1]],"date-time":"2009-01-01T00:00:00Z","timestamp":1230768000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2009]]},"DOI":"10.1007\/978-3-642-02319-4_40","type":"book-chapter","created":{"date-parts":[[2009,6,20]],"date-time":"2009-06-20T02:34:09Z","timestamp":1245465249000},"page":"336-343","source":"Crossref","is-referenced-by-count":1,"title":["Tentative Exploration on Reinforcement Learning Algorithms for Stochastic Rewards"],"prefix":"10.1007","author":[{"given":"Luis","family":"Pe\u00f1a","sequence":"first","affiliation":[]},{"given":"Antonio","family":"LaTorre","sequence":"additional","affiliation":[]},{"given":"Jos\u00e9-Mar\u00eda","family":"Pe\u00f1a","sequence":"additional","affiliation":[]},{"given":"Sascha","family":"Ossowski","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"40_CR1","doi-asserted-by":"publisher","first-page":"215","DOI":"10.1016\/S0004-3702(02)00121-2","volume":"136","author":"M. Bowling","year":"2002","unstructured":"Bowling, M., Veloso, M.: Multiagent learning using a variable learning rate. Artificial Intelligence\u00a0136, 215\u2013250 (2002)","journal-title":"Artificial Intelligence"},{"key":"40_CR2","unstructured":"Conitzer, V., Sandholm, T.: Bl-wolf: A framework for loss-bounded learnability in zero-sum games. In: International Conference on Machine Learning (ICML), pp. 91\u201398 (2003)"},{"key":"40_CR3","first-page":"2004","volume-title":"Advances in Neural Information Processing Systems","author":"G. Tesauro","year":"2004","unstructured":"Tesauro, G.: Extending q-learning to general adaptive multi-agent systems. In: Advances in Neural Information Processing Systems, vol.\u00a016, p. 2004. MIT Press, Cambridge (2004)"},{"key":"40_CR4","first-page":"27","volume-title":"Proceedings of the Eighteenth International Conference on Machine Learning","author":"M. Bowling","year":"2001","unstructured":"Bowling, M., Veloso, M.: Convergence of gradient dynamics with a variable learning rate. In: Proceedings of the Eighteenth International Conference on Machine Learning, pp. 27\u201334. Morgan Kaufmann, San Francisco (2001)"},{"key":"40_CR5","first-page":"541","volume-title":"Proceedings of the Sixteenth Conference on Uncertainty in Artificial Intelligence","author":"S. Singh","year":"2000","unstructured":"Singh, S., Kearns, M., Mansour, Y.: Nash convergence of gradient dynamics in general-sum games. In: Proceedings of the Sixteenth Conference on Uncertainty in Artificial Intelligence, pp. 541\u2013548. Morgan Kaufmann, San Francisco (2000)"},{"key":"40_CR6","first-page":"209","volume-title":"Advances in Neural Information Processing Systems","author":"M. Bowling","year":"2005","unstructured":"Bowling, M.: Convergence and no-regret in multiagent learning. In: Advances in Neural Information Processing Systems, vol.\u00a017, pp. 209\u2013216. MIT Press, Cambridge (2005)"},{"key":"40_CR7","unstructured":"Zinkevich, M.: Online convex programming and generalized infinitesimal gradient ascent. In: Proceedings of the Twentieth International Conference on Machine Learning, pp. 928\u2013936 (2003)"},{"key":"40_CR8","doi-asserted-by":"crossref","first-page":"521","DOI":"10.1613\/jair.2628","volume":"33","author":"S. Abdallah","year":"2008","unstructured":"Abdallah, S., Lesser, V.: A Multiagent Reinforcement Learning Algorithm with Non-linear Dynamics. Journal of Artificial Intelligence Research\u00a033, 521\u2013549 (2008)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"40_CR9","unstructured":"Sinha, A., Goldberg, D.: A survey of hybrid genetic and evolutionary algorithms. Technical Report 2003004, Illinois Genetic Algorithms Laboratory, IlliGAL (2003)"},{"key":"40_CR10","volume-title":"Proceedings of the ECML\/PKDD 2007 Workshop on Data Mining in Functional Genomics and Proteomics: Current Trends and Future Directions","author":"A. LaTorre","year":"2007","unstructured":"LaTorre, A., Pe\u00f1a, J., Gonz\u00e1lez, S., Robles, V., Famili, F.: Breast cancer biomarker selection using multiple offspring sampling. In: Proceedings of the ECML\/PKDD 2007 Workshop on Data Mining in Functional Genomics and Proteomics: Current Trends and Future Directions, Warsaw, Poland. Springer, Heidelberg (2007)"},{"key":"40_CR11","unstructured":"Tang, K., Yao, X., Suganthan, P., MacNish, C., Chen, Y., Chen, C., Yang, Z.: Benchmark functions for the cec 2008 special session and competition on large scale global optimization. Technical report, Nature Inspired Computation and Applications Laboratory, USTC, China (2007)"}],"container-title":["Lecture Notes in Computer Science","Hybrid Artificial Intelligence Systems"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-02319-4_40","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,3,8]],"date-time":"2019-03-08T16:46:05Z","timestamp":1552063565000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-02319-4_40"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009]]},"ISBN":["9783642023187","9783642023194"],"references-count":11,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-02319-4_40","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2009]]}}}