{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,22]],"date-time":"2026-03-22T05:45:33Z","timestamp":1774158333973,"version":"3.50.1"},"reference-count":48,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2013,7,6]],"date-time":"2013-07-06T00:00:00Z","timestamp":1373068800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Biol Cybern"],"published-print":{"date-parts":[[2013,8]]},"DOI":"10.1007\/s00422-013-0562-6","type":"journal-article","created":{"date-parts":[[2013,7,5]],"date-time":"2013-07-05T11:19:39Z","timestamp":1373023179000},"page":"477-490","source":"Crossref","is-referenced-by-count":25,"title":["Modular inverse reinforcement learning for visuomotor behavior"],"prefix":"10.1007","volume":"107","author":[{"given":"Constantin A.","family":"Rothkopf","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dana H.","family":"Ballard","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2013,7,6]]},"reference":[{"issue":"3","key":"562_CR1","doi-asserted-by":"crossref","first-page":"628","DOI":"10.1037\/0033-295X.113.3.628","volume":"113","author":"HC Barrett","year":"2006","unstructured":"Barrett HC, Kurzban R (2006) Modularity in cognition: framing the debate. Psychol Rev 113(3):628","journal-title":"Psychol Rev"},{"key":"562_CR2","first-page":"215","volume-title":"Models of information processing in the basal ganglia","author":"AC Barto","year":"1995","unstructured":"Barto AC (1995) Adaptive critics and the basal ganglia. In: Houk JC, Davis JL, Beiser DG (eds) Models of information processing in the basal ganglia. MIT Press, Cambridge, MA, pp 215\u2013232"},{"key":"562_CR3","doi-asserted-by":"crossref","first-page":"145","DOI":"10.1016\/S0921-8890(01)00155-5","volume":"37","author":"A Billard","year":"2001","unstructured":"Billard A, Mataric MJ (2001) Learning human arm movements by imitation: evaluation of a biologically inspired connectionist architecture. Robotics Auton Syst 37:145\u2013160","journal-title":"Robotics Auton Syst"},{"key":"562_CR4","doi-asserted-by":"crossref","first-page":"815","DOI":"10.1016\/j.neuron.2010.11.022","volume":"68","author":"ES Bromberg-Martin","year":"2010","unstructured":"Bromberg-Martin ES, Matsumoto M, Hikosaka O (2010) Dopamine in motivational control: Rewarding, aversive, and alerting. Neuron 68:815\u2013834","journal-title":"Neuron"},{"key":"562_CR5","doi-asserted-by":"crossref","unstructured":"Brooks R (1986) A robust layered control system for a mobile robot. IEEE J Robotics Autom 2(1):14\u201323","DOI":"10.1109\/JRA.1986.1087032"},{"key":"562_CR6","volume-title":"Advances in neural information processing systems 16","author":"Y-H Chang","year":"2004","unstructured":"Chang Y-H, Ho T, Kaelbling LP (2004) All learning is local: multi-agent learning in global reward games. In: Thrun S, Saul L, Sch\u00f6lkopf B (eds) Advances in neural information processing systems 16. MIT Press, Cambridge, MA"},{"key":"562_CR7","doi-asserted-by":"crossref","unstructured":"Daw ND, O\u2019Doherty JP, Dayan P, Seymour B, Dolan RJ (2006) Cortical substrates for exploratory decisions in humans. Nature 441(7095): 876\u2013879. ISSN 1476\u20134687. doi: 10.1038\/nature04766 . URL http:\/\/www.ncbi.nlm.nih.gov\/pubmed\/16778890","DOI":"10.1038\/nature04766"},{"issue":"2","key":"562_CR8","doi-asserted-by":"crossref","first-page":"199","DOI":"10.1016\/j.conb.2006.03.006","volume":"16","author":"ND Daw","year":"2006","unstructured":"Daw ND, Doya K (2006) The computational neurobiology of learning and reward. Curr Opin Neurobiol 16(2):199\u2013204","journal-title":"Curr Opin Neurobiol"},{"key":"562_CR9","unstructured":"Dayan P, Hinton GE (1992) Feudal reinforcement learning. In: Advances in neural information processing systems 5. Morgan Kaufmann Publishers, Burlington, pp 271\u2013271"},{"key":"562_CR10","doi-asserted-by":"crossref","unstructured":"Dimitrakakis C, Rothkopf CA (2011) Bayesian multitask inverse reinforcement learning. In: European workshop on reinforcemnt learning (EWRL)","DOI":"10.1007\/978-3-642-29946-9_27"},{"issue":"2","key":"562_CR11","doi-asserted-by":"crossref","first-page":"343","DOI":"10.1037\/0096-1523.29.2.343","volume":"29","author":"BR Fajen","year":"2003","unstructured":"Fajen BR, Warren WH (2003) Behavioral dynamics of steering, obstable avoidance, and route selection. J Exp Psychol Hum Percept Perform 29(2):343","journal-title":"J Exp Psychol Hum Percept Perform"},{"key":"562_CR12","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/4737.001.0001","volume-title":"Modularity of mind","author":"JA Fodor","year":"1983","unstructured":"Fodor JA (1983) Modularity of mind. MIT Press, Cambridge, MA"},{"issue":"43","key":"562_CR13","doi-asserted-by":"crossref","first-page":"13524","DOI":"10.1523\/JNEUROSCI.2469-09.2009","volume":"29","author":"SJ Gershman","year":"2009","unstructured":"Gershman SJ, Pesaran B, Daw ND (2009) Human reinforcement learning subdivides structured action spaces by learning effector-specific values. J Neurosci 29(43):13524\u201313531","journal-title":"J Neurosci"},{"key":"562_CR14","volume-title":"Decisions, uncertainty, and the brain: the science of neuroeconomics","author":"PW Glimcher","year":"2004","unstructured":"Glimcher PW (2004) Decisions, uncertainty, and the brain: the science of neuroeconomics. MIT Press, Bradford Books, Cambridge, MA"},{"key":"562_CR15","doi-asserted-by":"crossref","unstructured":"Gold JI, Shadlen MN (2007) The neural basis of decision making. Annu Rev Neurosci 30(1):535\u2013574. ISSN 0147\u2013006X. doi: 10.1146\/annurev.neuro.29.051605.113038","DOI":"10.1146\/annurev.neuro.29.051605.113038"},{"issue":"5180","key":"562_CR16","doi-asserted-by":"crossref","first-page":"1826","DOI":"10.1126\/science.8091209","volume":"265","author":"AM Graybiel","year":"1994","unstructured":"Graybiel AM, Aosaki T, Flaherty AW, Kimura M (1994) The basal ganglia and adaptive motor control. Science 265(5180):1826\u20131831","journal-title":"Science"},{"issue":"4","key":"562_CR17","doi-asserted-by":"crossref","first-page":"317","DOI":"10.1016\/j.jchemneu.2003.10.003","volume":"26","author":"SN Haber","year":"2003","unstructured":"Haber SN (2003) The primate basal ganglia: parallel and integrative networks. J Chem Neuroanat 26(4):317\u2013330","journal-title":"J Chem Neuroanat"},{"key":"562_CR18","doi-asserted-by":"crossref","first-page":"135","DOI":"10.7551\/mitpress\/3118.003.0018","volume-title":"From animals to animats 4: proceedings of the fourth international conference on simulation of adaptive behavior","author":"M Humphrys","year":"1996","unstructured":"Humphrys M (1996) Action selection methods using reinforcement learning. In: Maes P, Mataric M, Meyer J-A, Pollack J, Wilson SW (eds) From animals to animats 4: proceedings of the fourth international conference on simulation of adaptive behavior. MIT Press, Bradford Books, Cambridge, MA, pp 135\u2013144"},{"key":"562_CR19","doi-asserted-by":"crossref","unstructured":"Kaelbling LP (1993) Hierarchical learning in stochastic domains: preliminary results. In: Proceedings of the tenth international conference on machine learning, vol 951, pp 167\u2013173","DOI":"10.1016\/B978-1-55860-307-3.50028-9"},{"issue":"1","key":"562_CR20","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1023\/A:1011215321374","volume":"20","author":"YJ Lee","year":"2001","unstructured":"Lee YJ, Mangasarian OL (2001) Ssvm: a smooth support vector machine for classification. Comput Optim Appl 20(1):5\u201322","journal-title":"Comput Optim Appl"},{"key":"562_CR21","doi-asserted-by":"crossref","unstructured":"Lopes M, Melo F, Montesano L (2009) Active learning for reward estimation in inverse reinforcement learning. In: Buntine W, Grobelnik M, Mladeni\u0107 D, Shawe-Taylor J (eds) Machine learning and knowledge discovery in databases. Lecture notes in computer science, vol 5782. Springer, Berlin, Heidelberg, pp 31\u201346. http:\/\/dx.doi.org\/10.1007\/978-3-642-04174-7_3","DOI":"10.1007\/978-3-642-04174-7_3"},{"key":"562_CR22","doi-asserted-by":"crossref","unstructured":"Minsky M (1988) The society of mind. Simon and Schuster","DOI":"10.21236\/ADA200313"},{"key":"562_CR23","doi-asserted-by":"crossref","first-page":"1936","DOI":"10.1523\/JNEUROSCI.16-05-01936.1996","volume":"16","author":"PR Montague","year":"1996","unstructured":"Montague PR, Dayan P, Sejnowski TJ (1996) framework for mesencephalic dopamine systems based on predictive hebbian learning. J Neurosci 16:1936\u20131947","journal-title":"J Neurosci"},{"key":"562_CR24","unstructured":"Neu G, Szepesv\u00e1ri C (2007) Apprenticeship learning using inverse reinforcement learning and gradient methods. In: Proceedings of the 23 conference on uncertainty in, artificial intelligence, pp 295\u2013302"},{"key":"562_CR25","unstructured":"Ng AY, Russell S (2000) Algorithms for inverse reinforcement learning. In: Proceedings 17th international conference on machine learning, Morgan Kaufmann, pp 663\u2013670"},{"key":"562_CR26","doi-asserted-by":"crossref","unstructured":"Pastor P, Hoffmann H, Asfour T, Schaal S (2009) Learning and generalization of motor skills by learning from demonstration. In: International conference on robotics and automation","DOI":"10.1109\/ROBOT.2009.5152385"},{"key":"562_CR27","doi-asserted-by":"crossref","unstructured":"Pinker SA (1999) How the mind works. Ann N Y Acad Sci 882(1):119\u2013127","DOI":"10.1111\/j.1749-6632.1999.tb08538.x"},{"key":"562_CR28","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","volume-title":"Markov decision processes","author":"ML Puterman","year":"1994","unstructured":"Puterman ML (1994) Markov decision processes. Wiley, New York, NY"},{"key":"562_CR29","unstructured":"Ramachandran D, Amir E (2007) Bayesian inverse reinforcement learning. In: 20th internatinal joint conference artificial intelligence"},{"key":"562_CR30","unstructured":"Rothkopf CA (2008) Modular models of task based visually guided behavior. PhD thesis, Department of Brain and Cognitive Sciences, Department of Computer Science, University of Rochester"},{"key":"562_CR31","doi-asserted-by":"crossref","unstructured":"Rothkopf CA, Ballard DH (2010) Credit assignment in multiple goal embodied visuomotor behavior. Frontiers in Psychology, 1, Special Issue on Embodied, Cognition (00173)","DOI":"10.3389\/fpsyg.2010.00173"},{"key":"562_CR32","unstructured":"Rothkopf CA, Dimitrakakis C (2001) Preference elicitation and inverse reinforcement learning. In: 22nd European conference on machine learning (ECML)"},{"key":"562_CR33","unstructured":"Rummery GA, Niranjan M (1994) On-line Q-learning using connectionist systems. Technical report CUED\/F-INFENG\/TR 166, Cambridge University Engineering Department"},{"key":"562_CR34","unstructured":"Russell S, Zimdars AL (2003) Q-decomposition for reinforcement learning agents. In: Proceedings of the international conference on machine learning, vol 20, p 656"},{"issue":"5752","key":"562_CR35","doi-asserted-by":"crossref","first-page":"1337","DOI":"10.1126\/science.1115270","volume":"310","author":"K Samejima","year":"2005","unstructured":"Samejima K, Ueda Y, Doya K, Kimura M (2005) Representation of action-specific reward values in the striatum. Science 310(5752):1337","journal-title":"Science"},{"key":"562_CR36","doi-asserted-by":"crossref","unstructured":"Schmidt M, Fung G, Rosales R (2007) Fast optimization methods for l1 regularization: a comparative study and two new approaches. In: Kok J, Koronacki J, Mantaras R, Matwin S, Mladenic D, Skowron A (eds) Machine learning: ECML 2007, volume 4701 of Lecture notes in computer science, Springer, Berlin, 2007, pp 286\u2013297. ISBN 978-3-540-74957-8","DOI":"10.1007\/978-3-540-74958-5_28"},{"issue":"4","key":"562_CR37","doi-asserted-by":"crossref","first-page":"253","DOI":"10.1016\/0921-8890(92)90004-I","volume":"10","author":"G Sch\u00f6ner","year":"1992","unstructured":"Sch\u00f6ner G, Dose M (1992) A dynamical systems approach to task-level system integration used to plan and control autonomous vehicle motion. Robotics Auton Syst 10(4):253\u2013267","journal-title":"Robotics Auton Syst"},{"key":"562_CR38","doi-asserted-by":"crossref","first-page":"1593","DOI":"10.1126\/science.275.5306.1593","volume":"275","author":"W Schultz","year":"1997","unstructured":"Schultz W, Dayan P, Montague PR (1997) A neural substrate of prediction and reward. Science 275:1593\u20131599","journal-title":"Science"},{"issue":"6992","key":"562_CR39","doi-asserted-by":"crossref","first-page":"664","DOI":"10.1038\/nature02581","volume":"429","author":"B Seymour","year":"2004","unstructured":"Seymour B, O\u2019Doherty JP, Dayan P, Koltzenburg M, Jones AK, Dolan RJ, Friston KJ, Frackowiak RS (2004) Temporal difference models describe higher-order learning in humans. Nature 429(6992):664\u2013667","journal-title":"Nature"},{"key":"562_CR40","unstructured":"Singh S, Cohn D (1998) How to dynamically merge Markov decision processes. In: Neural information processing systems 10, pp 1057\u20131063"},{"key":"562_CR41","unstructured":"Sprague N, Ballard D (2003) Multiple-goal reinforcement learning with modular sarsa(0). In: International joint conference on artificial intelligence, Acapulco, August 2003"},{"issue":"2","key":"562_CR42","doi-asserted-by":"crossref","first-page":"11","DOI":"10.1145\/1265957.1265960","volume":"4","author":"N Sprague","year":"2007","unstructured":"Sprague N, Ballard DH (2007) Modeling embodied visual behaviors. ACM Trans Appl Percept 4(2):11","journal-title":"ACM Trans Appl Percept"},{"key":"562_CR43","first-page":"9","volume":"3","author":"RS Sutton","year":"1988","unstructured":"Sutton RS (1988) Learning to predict by the methods of temporal differences. Mach Learn 3:9\u201344","journal-title":"Mach Learn"},{"key":"562_CR44","volume-title":"Reinforcement learning: an introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton RS, Barto AG (1998) Reinforcement learning: an introduction. MIT Press, Cambridge, MA"},{"key":"562_CR45","volume-title":"Theory of games and economic behavior","author":"J Neumann Von","year":"1947","unstructured":"Von Neumann J, Morgenstern O, Rubinstein A, Kuhn HW (1947) Theory of games and economic behavior. Princeton University Press, Princeton, NJ"},{"key":"562_CR46","unstructured":"Whitehead SD (1991) A complexity analysis of cooperative mechanisms in reinforcement learning. In: Proceedings of the association for artificial intelligence"},{"key":"562_CR47","first-page":"45","volume":"7","author":"SD Whitehead","year":"1991","unstructured":"Whitehead SD, Ballard DH (1991) Learning to perceive and act by trial and error. Mach Learn 7:45\u201383","journal-title":"Mach Learn"},{"key":"562_CR48","unstructured":"Ziebart BD, Bagnell JA, Dey AK (2010) Modeling interaction via the principle of maximum causal entropy. In: Johannes F, Thorsten J (eds) Proceedings of the 27th international conference on machine learning (ICML-10), June 21\u201324, 2010. Haifa, Israel, pp 1255\u20131262"}],"container-title":["Biological Cybernetics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00422-013-0562-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00422-013-0562-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00422-013-0562-6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T23:16:32Z","timestamp":1715642192000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00422-013-0562-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,7,6]]},"references-count":48,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2013,8]]}},"alternative-id":["562"],"URL":"https:\/\/doi.org\/10.1007\/s00422-013-0562-6","relation":{},"ISSN":["0340-1200","1432-0770"],"issn-type":[{"value":"0340-1200","type":"print"},{"value":"1432-0770","type":"electronic"}],"subject":[],"published":{"date-parts":[[2013,7,6]]}}}