{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,25]],"date-time":"2025-11-25T06:47:35Z","timestamp":1764053255494,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":11,"publisher":"ACM","license":[{"start":{"date-parts":[[2011,7,12]],"date-time":"2011-07-12T00:00:00Z","timestamp":1310428800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2011,7,12]]},"DOI":"10.1145\/2001858.2001957","type":"proceedings-article","created":{"date-parts":[[2011,7,15]],"date-time":"2011-07-15T12:25:09Z","timestamp":1310732709000},"page":"177-178","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["Evolution of reward functions for reinforcement learning"],"prefix":"10.1145","author":[{"given":"Scott","family":"Niekum","sequence":"first","affiliation":[{"name":"University of Massachusetts, Amherst, MA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lee","family":"Spector","sequence":"additional","affiliation":[{"name":"Hampshire College, Amherst, MA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Andrew","family":"Barto","sequence":"additional","affiliation":[{"name":"University of Massachusetts, Amherst, MA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2011,7,12]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Adaptive individuals in evolving populations: models and algorithms","author":"Belew R. K.","year":"1996","unstructured":"R. K. Belew and M. Mitchell , editors . Adaptive individuals in evolving populations: models and algorithms . Addison-Wesley Longman Publishing Co., Inc. , Boston, MA, USA , 1996 . R. K. Belew and M. Mitchell, editors. Adaptive individuals in evolving populations: models and algorithms. Addison-Wesley Longman Publishing Co., Inc., Boston, MA, USA, 1996."},{"key":"e_1_3_2_1_2_1","first-page":"278","volume-title":"Proc. 16th Intl. Conf. on Machine Learning","author":"Ng A.","year":"1999","unstructured":"A. Ng , D. Harada , and S. Russell . Policy invariance under reward transformations: theory and application to reward shaping . In Proc. 16th Intl. Conf. on Machine Learning , pages 278 -- 287 , 1999 . A. Ng, D. Harada, and S. Russell. Policy invariance under reward transformations: theory and application to reward shaping. In Proc. 16th Intl. Conf. on Machine Learning, pages 278--287, 1999."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAMD.2010.2051436"},{"key":"e_1_3_2_1_4_1","first-page":"113","volume-title":"Genetic Programming Theory and Practice IV","author":"Schmidt M. D.","year":"2006","unstructured":"M. D. Schmidt and H. Lipson . Co-evolving fitness predictors for accelerating and reducing evaluations . In R. L. Riolo, T. Soule, and B. Worzel, editors, Genetic Programming Theory and Practice IV , pages 113 -- 130 . Springer , 2006 . M. D. Schmidt and H. Lipson. Co-evolving fitness predictors for accelerating and reducing evaluations. In R. L. Riolo, T. Soule, and B. Worzel, editors, Genetic Programming Theory and Practice IV, pages 113--130. Springer, 2006."},{"key":"e_1_3_2_1_5_1","first-page":"2601","volume-title":"Proc. 31st Annual Conf. of the Cognitive Science Society","author":"Singh S.","year":"2009","unstructured":"S. Singh , R. Lewis , and A. Barto . Where do rewards come from ? In Proc. 31st Annual Conf. of the Cognitive Science Society , pages 2601 -- 2606 , 2009 . S. Singh, R. Lewis, and A. Barto. Where do rewards come from? In Proc. 31st Annual Conf. of the Cognitive Science Society, pages 2601--2606, 2009."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAMD.2010.2051031"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/1068009.1068292"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1014538503543"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.5555\/551283"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10710-010-9106-1"},{"key":"e_1_3_2_1_11_1","volume-title":"Learning from delayed rewards","author":"Watkins C.","year":"1989","unstructured":"C. Watkins . Learning from delayed rewards . PhD Thesis University of Cambridge , England , 1989 . C. Watkins. Learning from delayed rewards. PhD Thesis University of Cambridge, England, 1989."}],"event":{"name":"GECCO '11: Genetic and Evolutionary Computation Conference","sponsor":["SIGEVO ACM Special Interest Group on Genetic and Evolutionary Computation"],"location":"Dublin Ireland","acronym":"GECCO '11"},"container-title":["Proceedings of the 13th annual conference companion on Genetic and evolutionary computation"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2001858.2001957","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2001858.2001957","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T11:06:26Z","timestamp":1750244786000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2001858.2001957"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011,7,12]]},"references-count":11,"alternative-id":["10.1145\/2001858.2001957","10.1145\/2001858"],"URL":"https:\/\/doi.org\/10.1145\/2001858.2001957","relation":{},"subject":[],"published":{"date-parts":[[2011,7,12]]},"assertion":[{"value":"2011-07-12","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}