{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T18:01:03Z","timestamp":1743098463032,"version":"3.40.3"},"publisher-location":"Berlin, Heidelberg","reference-count":44,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642398742"},{"type":"electronic","value":"9783642398759"}],"license":[{"start":{"date-parts":[[2013,1,1]],"date-time":"2013-01-01T00:00:00Z","timestamp":1356998400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2013,1,1]],"date-time":"2013-01-01T00:00:00Z","timestamp":1356998400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2013]]},"DOI":"10.1007\/978-3-642-39875-9_6","type":"book-chapter","created":{"date-parts":[[2013,11,13]],"date-time":"2013-11-13T14:29:24Z","timestamp":1384352964000},"page":"99-125","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Learning and Coordinating Repertoires of Behaviors with Common Reward: Credit Assignment and Module Activation"],"prefix":"10.1007","author":[{"given":"Constantin A.","family":"Rothkopf","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dana H.","family":"Ballard","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2013,9,28]]},"reference":[{"issue":"1","key":"6_CR1","doi-asserted-by":"publisher","first-page":"68","DOI":"10.1162\/jocn.1995.7.1.66","volume":"7","author":"D. H. Ballard","year":"1995","unstructured":"Ballard, D. H., Hayhoe, M. M., Pelz, J. (1995). Memory representations in natural tasks. Journal of Cognitive Neuroscience, 7(1), 68\u201382.","journal-title":"Journal of Cognitive Neuroscience"},{"key":"6_CR2","doi-asserted-by":"crossref","first-page":"723","DOI":"10.1017\/S0140525X97001611","volume":"20","author":"D. H. Ballard","year":"1997","unstructured":"Ballard, D. H., Hayhoe, M. M., Pook, P., Rao, R. P. N. R. (1997). Deictic codes for the embodiment of cognition. Behavioral and Brain Sciences, 20, 723\u2013767.","journal-title":"Behavioral and Brain Sciences"},{"issue":"3","key":"6_CR3","doi-asserted-by":"publisher","first-page":"628","DOI":"10.1037\/0033-295X.113.3.628","volume":"113","author":"H. Barrett","year":"2006","unstructured":"Barrett, H., & Kurzban, R. (2006). Modularity in cognition: framing the debate. Psychological Review; Psychological Review, 113(3), 628.","journal-title":"Psychological Review; Psychological Review"},{"key":"6_CR4","doi-asserted-by":"crossref","unstructured":"Brooks, R. (1986). A robust layered control system for a mobile robot. IEEE Journal of Robotics and Automation, 2(1).","DOI":"10.1109\/JRA.1986.1087032"},{"key":"6_CR5","volume-title":"Advances in neural information processing systems 16","author":"Y.-H. Chang","year":"2004","unstructured":"Chang, Y.-H., Ho, T., Kaelbling, L. P. (2004). All learning is local: multi-agent learning in global reward games. In S. Thrun, L. Saul, B. Sch\u00f6lkopf (Eds.), Advances in neural information processing systems 16. Cambridge: MIT."},{"issue":"2","key":"6_CR6","doi-asserted-by":"publisher","first-page":"199","DOI":"10.1016\/j.conb.2006.03.006","volume":"16","author":"N. Daw","year":"2006","unstructured":"Daw, N., & Doya, K. (2006). The computational neurobiology of learning and reward. Current opinion in Neurobiology, 16(2), 199\u2013204.","journal-title":"Current opinion in Neurobiology"},{"issue":"12","key":"6_CR7","doi-asserted-by":"publisher","first-page":"1704","DOI":"10.1038\/nn1560","volume":"8","author":"N. D. Daw","year":"2005","unstructured":"Daw, N. D., Niv, Y., Dayan, P. (2005). Uncertainty-based competition between prefrontal and dorsolateral striatal systems for behavioral control. Nature Neuroscience, 8(12), 1704\u20131711.","journal-title":"Nature Neuroscience"},{"key":"6_CR8","first-page":"271","volume-title":"Advances in neural information processing systems 5","author":"P. Dayan","year":"1992","unstructured":"Dayan, P., & Hinton, G. E. (1992). Feudal reinforcement learning. In Advances in neural information processing systems 5 (pp. 271\u2013271). Los Altos: Morgan Kaufmann Publishers, Inc."},{"issue":"6","key":"6_CR9","doi-asserted-by":"publisher","first-page":"1347","DOI":"10.1162\/089976602753712972","volume":"14","author":"K. Doya","year":"2002","unstructured":"Doya, K., Samejima, K., Katagiri, K.-I., Kawato, M. (2002). Multiple model-based reinforcement learning. Neural Computation, 14(6), 1347\u20131369.","journal-title":"Neural Computation"},{"key":"6_CR10","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/4737.001.0001","volume-title":"Modularity of Mind","author":"J. A. Fodor","year":"1983","unstructured":"Fodor, J. A. (1983). Modularity of Mind. Cambridge: MIT."},{"key":"6_CR11","first-page":"197","volume-title":"Proceedings of the fifteenth international conference on machine learning","author":"Z. G\u00e1bor","year":"1998","unstructured":"G\u00e1bor, Z., Kalm\u00e1r, Z., Szepesv\u00e1ri, C. (1998). Multi-criteria reinforcement learning. In Proceedings of the fifteenth international conference on machine learning (pp. 197\u2013205). Los Altos: Morgan Kaufmann Publishers Inc."},{"issue":"43","key":"6_CR12","doi-asserted-by":"publisher","first-page":"13524","DOI":"10.1523\/JNEUROSCI.2469-09.2009","volume":"29","author":"S. Gershman","year":"2009","unstructured":"Gershman, S., Pesaran, B., Daw, N. (2009). Human reinforcement learning subdivides structured action spaces by learning effector-specific values. The Journal of Neuroscience, 29(43), 13524\u201313531.","journal-title":"The Journal of Neuroscience"},{"key":"6_CR13","doi-asserted-by":"crossref","first-page":"399","DOI":"10.1613\/jair.1000","volume":"19","author":"C. Guestrin","year":"2003","unstructured":"Guestrin, C., Koller, D., Parr, R., Venkataraman, S. (2003). Efficient solution algorithms for factored MDPs. Journal of Artificial Intelligence Research, 19, 399\u2013468.","journal-title":"Journal of Artificial Intelligence Research"},{"key":"6_CR14","doi-asserted-by":"crossref","first-page":"135","DOI":"10.7551\/mitpress\/3118.003.0018","volume-title":"From animals to animats 4: proceedings of the fourth international conference on simulation of adaptive behavior","author":"M. Humphrys","year":"1996","unstructured":"Humphrys, M. (1996). Action selection methods using reinforcement learning. In P. Maes, M. Mataric, J.-A. Meyer, J. Pollack, S. W. Wilson (Eds.), From animals to animats 4: proceedings of the fourth international conference on simulation of adaptive behavior (pp. 135\u2013144). Cambridge: MIT, Bradford Books."},{"issue":"1","key":"6_CR15","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1162\/neco.1991.3.1.79","volume":"3","author":"R. Jacobs","year":"1991","unstructured":"Jacobs, R., Jordan, M., Nowlan, S., Hinton, G. (1991). Adaptive mixtures of local experts. Neural Computation, 3(1), 79\u201387.","journal-title":"Neural Computation"},{"issue":"6","key":"6_CR16","doi-asserted-by":"publisher","first-page":"733","DOI":"10.1016\/j.neuron.2009.09.003","volume":"63","author":"J. Kable","year":"2009","unstructured":"Kable, J., & Glimcher, P. (2009). The neurobiology of decision: consensus and controversy. Neuron, 63(6), 733\u2013745.","journal-title":"Neuron"},{"key":"6_CR17","volume-title":"Hierarchical learning in stochastic domains: Preliminary results. In Proceedings of the tenth international conference on machine learning (vol. 951, pp. 167\u2013173)","author":"L. P. Kaelbling","year":"1993","unstructured":"Kaelbling, L. P. (1993). Hierarchical learning in stochastic domains: Preliminary results. In Proceedings of the tenth international conference on machine learning (vol. 951, pp. 167\u2013173). Los Altos: Morgan Kaufmann."},{"key":"6_CR18","unstructured":"Karlsson, J. (1997). Learning to solve multiple goals. PhD thesis, University of Rochester."},{"key":"6_CR19","first-page":"481","volume-title":"Proceedings of the international conference on machine learning","author":"J. R. Kok","year":"2004","unstructured":"Kok, J. R., & Vlassis, N. (2004). Sparse cooperative q-learning. In Proceedings of the international conference on machine learning (pp. 481\u2013488). New York: ACM."},{"key":"6_CR20","doi-asserted-by":"publisher","first-page":"1340","DOI":"10.1038\/81887","volume":"3","author":"M. F. Land","year":"2000","unstructured":"Land, M. F., & McLeod, P. (2000). From eye movements to actions: how batsmen hit the ball. Nature Neuroscience, 3, 1340\u20131345.","journal-title":"Nature Neuroscience"},{"key":"6_CR21","first-page":"325","volume":"5","author":"S. Mannor","year":"2004","unstructured":"Mannor, S., & Shimkin, N. (2004). A geometric approach to multi-criterion reinforcement learning. The Journal of Machine Learning Research, 5, 325\u2013360.","journal-title":"The Journal of Machine Learning Research"},{"key":"6_CR22","first-page":"165","volume-title":"AAAI\/IAAI","author":"N. Meuleau","year":"1998","unstructured":"Meuleau, N., Hauskrecht, M., Kim, K.-E., Peshkin, L., Kaelbling, L., Dean, T., Boutilier, C. (1998). Solving very large weakly coupled markov decision processes. In AAAI\/IAAI (pp. 165\u2013172). Menlo Park: AAAI Press."},{"key":"6_CR23","volume-title":"The society of mind","author":"M. Minsky","year":"1988","unstructured":"Minsky, M. (1988). The society of mind. New York: Simon and Schuster."},{"issue":"8","key":"6_CR24","doi-asserted-by":"publisher","first-page":"1057","DOI":"10.1038\/nn1743","volume":"9","author":"G. Morris","year":"2006","unstructured":"Morris, G., Nevet, A., Arkadir, D., Vaadia, E., Bergman, H. (2006). Midbrain dopamine neurons encode decisions for future action. Nature Neuroscience, 9(8), 1057\u20131063.","journal-title":"Nature Neuroscience"},{"key":"6_CR25","doi-asserted-by":"crossref","first-page":"601","DOI":"10.1145\/1102351.1102427","volume-title":"Proceedings of the 22nd international conference on machine learning","author":"S. Natarajan","year":"2005","unstructured":"Natarajan, S., & Tadepalli, P. (2005). Dynamic preferences in multi-criteria reinforcement learning. In Proceedings of the 22nd international conference on machine learning (pp. 601\u2013608). New York: ACM."},{"issue":"1","key":"6_CR26","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1111\/j.1749-6632.1999.tb08538.x","volume":"882","author":"S. Pinker","year":"1999","unstructured":"Pinker, S. (1999). How the mind works. Annals of the New York Academy of Sciences, 882(1), 119\u2013127.","journal-title":"Annals of the New York Academy of Sciences"},{"key":"6_CR27","unstructured":"Ring, M. B. (1994). Continual learning in reinforcement environments. PhD thesis, University of Texas at Austin."},{"key":"6_CR28","unstructured":"Rothkopf, C. A. (2008). Modular models of task based visually guided behavior. PhD thesis, Department of Brain and Cognitive Sciences, Department of Computer Science, University of Rochester."},{"key":"6_CR29","doi-asserted-by":"crossref","unstructured":"Rothkopf, C. A., & Ballard, D. H. (2010). Credit assignment in multiple goal embodied visuomotor behavior. Frontiers in Psychology, 1, Special Issue on Embodied Cognition(00173).","DOI":"10.3389\/fpsyg.2010.00173"},{"key":"6_CR30","unstructured":"Rummery, G. A., & Niranjan, M. (1994). On-line Q-learning using connectionist systems. Technical Report CUED\/F-INFENG\/TR 166, Cambridge University Engineering Department."},{"key":"6_CR31","volume-title":"Q-decomposition for reinforcement learning agents. In Proceedings of the international conference on machine learning (vol. 20, p. 656)","author":"S. Russell","year":"2003","unstructured":"Russell, S., & Zimdars, A. L. (2003). Q-decomposition for reinforcement learning agents. In Proceedings of the international conference on machine learning (vol. 20, p. 656). Menlo Park: AAAI Press."},{"key":"6_CR32","first-page":"1063","volume":"5","author":"B. Sallans","year":"2004","unstructured":"Sallans, B., & Hinton, G. E. (2004). Reinforcement learning with factored states and actions. Journal of Machine Learning Research, 5, 1063\u20131088.","journal-title":"Journal of Machine Learning Research"},{"issue":"5752","key":"6_CR33","doi-asserted-by":"publisher","first-page":"1337","DOI":"10.1126\/science.1115270","volume":"310","author":"K. Samejima","year":"2005","unstructured":"Samejima, K., Ueda, Y., Doya, K., Kimura, M. (2005). Representation of action-specific reward values in the striatum. Science, 310(5752), 1337.","journal-title":"Science"},{"key":"6_CR34","first-page":"371","volume-title":"Proceedings of the 16th international conference on machine learning","author":"J. Schneider","year":"1999","unstructured":"Schneider, J., Wong, W.-K., Moore, A., Riedmiller, M. (1999). Distributed value functions. In Proceedings of the 16th international conference on machine learning (pp. 371\u2013378). San Francisco: Morgan Kaufmann."},{"key":"6_CR35","doi-asserted-by":"publisher","first-page":"1593","DOI":"10.1126\/science.275.5306.1593","volume":"275","author":"W. Schultz","year":"1997","unstructured":"Schultz, W., Dayan, P., Montague, P. (1997). A neural substrate of prediction and reward. Science, 275, 1593\u20131599.","journal-title":"Science"},{"key":"6_CR36","first-page":"1057","volume-title":"Neural information processing systems 10","author":"S. Singh","year":"1998","unstructured":"Singh, S., & Cohn, D. (1998). How to dynamically merge markov decision processes. In Neural information processing systems 10 (pp. 1057\u20131063). Cambridge: The MIT Press."},{"key":"6_CR37","first-page":"1445","volume-title":"International joint conference on artificial intelligence","author":"N. Sprague","year":"2003","unstructured":"Sprague, N., & Ballard, D. (2003). Multiple-goal reinforcement learning with modular sarsa(0). In International joint conference on artificial intelligence (pp. 1445\u20131447). Morgan Kaufmann: Acapulco."},{"issue":"2","key":"6_CR38","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1145\/1265957.1265960","volume":"4","author":"N. Sprague","year":"2007","unstructured":"Sprague, N., Ballard, D., Robinson, A. (2007). Modeling embodied visual behaviors. ACM Transactions on Applied Perception, 4(2), 11.","journal-title":"ACM Transactions on Applied Perception"},{"key":"6_CR39","volume-title":"Reinforcement learning: an introduction","author":"R. S. Sutton","year":"1998","unstructured":"Sutton, R. S., & Barto, A. G. (1998). Reinforcement learning: an introduction. Cambridge: MIT."},{"key":"6_CR40","volume-title":"Scalable reinforcement learning through hierarchical decompositions for weakly-coupled problems. In 2011 IEEE 10th international conference on development and learning (ICDL) (Vol. 2, pp. 1\u20137)","author":"H. Toutounji","year":"2011","unstructured":"Toutounji, H., Rothkopf, C. A., Triesch, J. (2011). Scalable reinforcement learning through hierarchical decompositions for weakly-coupled problems. In 2011 IEEE 10th international conference on development and learning (ICDL) (Vol. 2, pp. 1\u20137). New York: IEEE."},{"key":"6_CR41","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1016\/0010-0277(84)90023-4","volume":"18","author":"S. Ullman","year":"1984","unstructured":"Ullman, S. (1984). Visual routines. Cognition, 18, 97\u2013157.","journal-title":"Cognition"},{"key":"6_CR42","volume-title":"Theory of games and economic behavior","author":"J. Von Neumann","year":"1947","unstructured":"Von Neumann, J., Morgenstern, O., Rubinstein, A., Kuhn, H. (1947). Theory of games and economic behavior. Princeton: Princeton University Press."},{"key":"6_CR43","unstructured":"Watkins, C. J. (1989). Learning from delayed rewards. PhD thesis, University of Cambridge."},{"key":"6_CR44","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4899-5379-7","volume-title":"Eye movements and vision","author":"A. Yarbus","year":"1967","unstructured":"Yarbus, A. (1967). Eye movements and vision. New York: Plenum Press."}],"container-title":["Computational and Robotic Models of the Hierarchical Organization of Behavior"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-39875-9_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,19]],"date-time":"2024-05-19T11:56:59Z","timestamp":1716119819000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-642-39875-9_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013]]},"ISBN":["9783642398742","9783642398759"],"references-count":44,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-39875-9_6","relation":{},"subject":[],"published":{"date-parts":[[2013]]},"assertion":[{"value":"28 September 2013","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}