{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,8]],"date-time":"2026-01-08T22:38:53Z","timestamp":1767911933695,"version":"3.49.0"},"reference-count":4,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2010,9,1]],"date-time":"2010-09-01T00:00:00Z","timestamp":1283299200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Artif Life Robotics"],"published-print":{"date-parts":[[2010,9]]},"DOI":"10.1007\/s10015-010-0822-7","type":"journal-article","created":{"date-parts":[[2010,10,5]],"date-time":"2010-10-05T06:03:22Z","timestamp":1286258602000},"page":"351-354","source":"Crossref","is-referenced-by-count":3,"title":["A study of Q-learning considering negative rewards"],"prefix":"10.1007","volume":"15","author":[{"given":"Takayasu","family":"Fuchida","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kathy Thi","family":"Aung","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Atsushi","family":"Sakuragi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2010,10,6]]},"reference":[{"key":"822_CR1","volume-title":"Reinforcement learning: an introduction","author":"R.S. Sutton","year":"1998","unstructured":"Sutton RS, Barto AG (1998) Reinforcement learning: an introduction. MIT Press, Cambridge"},{"key":"822_CR2","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"L.P. Kaelbling","year":"1996","unstructured":"Kaelbling LP, Littman ML, Moore AW (1996) Reinforcement learning: a survey. J Artif Intel Res 4:237\u2013285","journal-title":"J Artif Intel Res"},{"key":"822_CR3","unstructured":"Yamada S, Ohashi T, Yoshida T, et al (1998) Research about reinforcement learning for autonomic robot in multi-agent environment. IPSJ Kyushu Conference 2B-2, pp 177\u2013186"},{"key":"822_CR4","unstructured":"Okada H, Yamakawa H, Omori T (2000) Reinforcement learning by reward and punishment. Technical Report of IEICE, NC99-100, pp 55\u201362"}],"container-title":["Artificial Life and Robotics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10015-010-0822-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10015-010-0822-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10015-010-0822-7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,5]],"date-time":"2019-06-05T02:58:13Z","timestamp":1559703493000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10015-010-0822-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,9]]},"references-count":4,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2010,9]]}},"alternative-id":["822"],"URL":"https:\/\/doi.org\/10.1007\/s10015-010-0822-7","relation":{},"ISSN":["1433-5298","1614-7456"],"issn-type":[{"value":"1433-5298","type":"print"},{"value":"1614-7456","type":"electronic"}],"subject":[],"published":{"date-parts":[[2010,9]]}}}