{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,8]],"date-time":"2026-07-08T17:21:04Z","timestamp":1783531264401,"version":"3.55.0"},"reference-count":80,"publisher":"Springer Science and Business Media LLC","issue":"8-9","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Biol Cybern"],"published-print":{"date-parts":[[2012,10]]},"DOI":"10.1007\/s00422-012-0512-8","type":"journal-article","created":{"date-parts":[[2012,8,2]],"date-time":"2012-08-02T13:50:57Z","timestamp":1343915457000},"page":"523-541","source":"Crossref","is-referenced-by-count":184,"title":["Active inference and agency: optimal control without cost functions"],"prefix":"10.1007","volume":"106","author":[{"given":"Karl","family":"Friston","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Spyridon","family":"Samothrakis","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Read","family":"Montague","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2012,8,3]]},"reference":[{"key":"512_CR1","doi-asserted-by":"crossref","first-page":"125","DOI":"10.1080\/00221309.1947.9918144","volume":"37","author":"WR Ashby","year":"1947","unstructured":"Ashby WR (1947) Principles of the self-organizing dynamic system. J Gen Psychol 37: 125\u2013128","journal-title":"J Gen Psychol"},{"issue":"7","key":"512_CR2","doi-asserted-by":"crossref","first-page":"3228","DOI":"10.1073\/pnas.0911531107","volume":"107","author":"N Axmacher","year":"2010","unstructured":"Axmacher N, Henseler MM, Jensen O, Weinreich I, Elger CE, Fell J (2010) Cross-frequency coupling supports multi-item working memory in the human hippocampus. Proc Natl Acad Sci 107(7): 3228\u20133233","journal-title":"Proc Natl Acad Sci"},{"key":"512_CR3","doi-asserted-by":"crossref","first-page":"351","DOI":"10.1613\/jair.807","volume":"15","author":"J Baxter","year":"2001","unstructured":"Baxter J, Bartlett PL, Weaver L (2001) Experiments with Infinite- Horizon, Policy-Gradient Estimation. J Artif Intell Res 15: 351\u2013381","journal-title":"J Artif Intell Res"},{"key":"512_CR4","unstructured":"Beal MJ (2003) Variational algorithms for approximate bayesian inference\u2019. PhD. Thesis, University College London, London"},{"key":"512_CR5","doi-asserted-by":"crossref","first-page":"716","DOI":"10.1073\/pnas.38.8.716","volume":"38","author":"R Bellman","year":"1952","unstructured":"Bellman R (1952) On the theory of dynamic programming. Proc Natl Acad Sci USA 38: 716\u2013719","journal-title":"Proc Natl Acad Sci USA"},{"issue":"2","key":"512_CR6","doi-asserted-by":"crossref","first-page":"179","DOI":"10.1016\/j.physbeh.2004.02.004","volume":"81","author":"KC Berridge","year":"2004","unstructured":"Berridge KC (2004) Motivation concepts in behavioral neuroscience. Physiol Behav 81(2): 179\u2013209","journal-title":"Physiol Behav"},{"key":"512_CR7","doi-asserted-by":"crossref","first-page":"656","DOI":"10.1073\/pnas.17.12.656","volume":"17","author":"GD Birkhoff","year":"1931","unstructured":"Birkhoff GD (1931) Proof of the ergodic theorem. Proc Natl Acad Sci USA 17: 656\u2013660","journal-title":"Proc Natl Acad Sci USA"},{"key":"512_CR8","unstructured":"Botvinick MM, An J (2008) Goal-directed decision making in prefrontal cortex: a computational framework. Adv Neural Inf Process Syst (NIPS) 21"},{"key":"512_CR9","doi-asserted-by":"crossref","unstructured":"Braun DA, Ortega P, Theodorou E, Schaal S (2011) Path integral control and bounded rationality. In: ADPRL 2011, Paris","DOI":"10.1109\/ADPRL.2011.5967366"},{"issue":"6","key":"512_CR10","doi-asserted-by":"crossref","first-page":"1289","DOI":"10.1214\/aos\/1176345645","volume":"9","author":"LD Brown","year":"1981","unstructured":"Brown LD (1981) A complete class theorem for statistical problems with finite sample spaces. Ann Stat 9(6): 1289\u20131300","journal-title":"Ann Stat"},{"issue":"5","key":"512_CR11","doi-asserted-by":"crossref","first-page":"225","DOI":"10.1016\/S1364-6613(03)00094-9","volume":"7","author":"CF Camerer","year":"2003","unstructured":"Camerer CF (2003) Behavioural studies of strategic thinking in games. Trends Cogn Sci 7(5): 225\u2013231","journal-title":"Trends Cogn Sci"},{"issue":"5793","key":"512_CR12","doi-asserted-by":"crossref","first-page":"1626","DOI":"10.1126\/science.1128115","volume":"313","author":"RT Canolty","year":"2006","unstructured":"Canolty RT, Edwards E, Dalal SS, Soltani M, Nagarajan SS, Kirsch HE, Berger MS, Barbaro NM, Knight R (2006) High gamma power is phase-locked to theta oscillations in human neocortex. Science 313(5793): 1626\u20131628","journal-title":"Science"},{"key":"512_CR13","unstructured":"Cooper G (1988) A method for using belief networks as influence diagrams. In: Proceedings of the Conference on uncertainty in artificial intelligence"},{"issue":"2","key":"512_CR14","doi-asserted-by":"crossref","first-page":"199","DOI":"10.1016\/j.conb.2006.03.006","volume":"16","author":"ND Daw","year":"2006","unstructured":"Daw ND, Doya K (2006) The computational neurobiology of learning and reward. Curr Opin Neurobiol 16(2): 199\u2013204","journal-title":"Curr Opin Neurobiol"},{"issue":"4","key":"512_CR15","doi-asserted-by":"crossref","first-page":"429","DOI":"10.3758\/CABN.8.4.429","volume":"8","author":"P Dayan","year":"2008","unstructured":"Dayan P, Daw ND (2008) Decision theory, reinforcement learning, and the brain. Cogn Affect Behav Neurosci 8(4): 429\u2013453","journal-title":"Cogn Affect Behav Neurosci"},{"key":"512_CR16","doi-asserted-by":"crossref","first-page":"271","DOI":"10.1162\/neco.1997.9.2.271","volume":"9","author":"P Dayan","year":"1997","unstructured":"Dayan P, Hinton GE (1997) Using expectation maximization for reinforcement learning. Neural Comput 9: 271\u2013278","journal-title":"Neural Comput"},{"key":"512_CR17","doi-asserted-by":"crossref","first-page":"889","DOI":"10.1162\/neco.1995.7.5.889","volume":"7","author":"P Dayan","year":"1995","unstructured":"Dayan P, Hinton GE, Neal R (1995) The Helmholtz machine. Neural Comput 7: 889\u2013904","journal-title":"Neural Comput"},{"key":"512_CR18","unstructured":"Duff M, (2002) Optimal learning: computational procedure for bayes-adaptive markov decision processes. PhD thesis. University of Massachusetts, Amherst"},{"key":"512_CR19","doi-asserted-by":"crossref","first-page":"15551","DOI":"10.1080\/0026897031000085173","volume":"101","author":"DJ Evans","year":"2003","unstructured":"Evans DJ (2003) A non-equilibrium free energy theorem for deterministic systems. Mol Phys 101:15551\u201315554","journal-title":"Mol Phys"},{"issue":"9","key":"512_CR20","first-page":"874","volume":"21","author":"AA Feldbaum","year":"1961","unstructured":"Feldbaum AA (1961) Dual control theory, Part I. Autom Remote Control 21(9): 874\u2013880","journal-title":"Autom Remote Control"},{"key":"512_CR21","doi-asserted-by":"crossref","first-page":"215","DOI":"10.3389\/fnhum.2010.00215","volume":"4","author":"H Feldman","year":"2010","unstructured":"Feldman H, Friston KJ (2010) Attention, uncertainty, and free-energy. Front Hum Neurosci 4: 215","journal-title":"Front Hum Neurosci"},{"key":"512_CR22","volume-title":"Statistical mechanics","author":"RP Feynman","year":"1972","unstructured":"Feynman RP (1972) Statistical mechanics. Benjamin, Reading MA"},{"key":"512_CR23","doi-asserted-by":"crossref","DOI":"10.1007\/b96083","volume-title":"Adaptive dual control: theory and applications (lecture notes in control and information sciences","author":"N Filatov","year":"2004","unstructured":"Filatov N, Unbehauen H (2004) Adaptive dual control: theory and applications (lecture notes in control and information sciences. Springer, Berlin"},{"key":"512_CR24","doi-asserted-by":"crossref","unstructured":"Fox C, Roberts S (2011) A tutorial on variational Bayes. In: Artificial intelligence review. Spinger, Berlin","DOI":"10.1007\/s10462-011-9236-8"},{"issue":"11","key":"512_CR25","doi-asserted-by":"crossref","first-page":"e1000211","DOI":"10.1371\/journal.pcbi.1000211","volume":"4","author":"K Friston","year":"2008","unstructured":"Friston K (2008) Hierarchical models in the brain. PLoS Comput Biol 4(11): e1000211","journal-title":"PLoS Comput Biol"},{"issue":"2","key":"512_CR26","doi-asserted-by":"crossref","first-page":"127","DOI":"10.1038\/nrn2787","volume":"11","author":"K Friston","year":"2010","unstructured":"Friston K (2010) The free-energy principle: a unified brain theory?. Nat Rev Neurosci 11(2): 127\u2013138","journal-title":"Nat Rev Neurosci"},{"issue":"3","key":"512_CR27","doi-asserted-by":"crossref","first-page":"488","DOI":"10.1016\/j.neuron.2011.10.018","volume":"72","author":"K Friston","year":"2011","unstructured":"Friston K (2011) What is optimal about motor control?. Neuron 72(3): 488\u2013498","journal-title":"Neuron"},{"key":"512_CR28","doi-asserted-by":"crossref","unstructured":"Friston K, Ao P (2012) Free-energy, value and attractors. In: Computational and mathematical methods in medicine, vol 2012","DOI":"10.1155\/2012\/937860"},{"issue":"8","key":"512_CR29","doi-asserted-by":"crossref","first-page":"1093","DOI":"10.1016\/j.neunet.2009.07.023","volume":"22","author":"K Friston","year":"2009","unstructured":"Friston K, Kiebel S (2009) Cortical circuits for perceptual inference. Neural Netw 22(8): 1093\u20131104","journal-title":"Neural Netw"},{"issue":"1521","key":"512_CR30","doi-asserted-by":"crossref","first-page":"1211","DOI":"10.1098\/rstb.2008.0300","volume":"364","author":"K Friston","year":"2009","unstructured":"Friston K, Kiebel S (2009) Predictive coding under the free-energy principle. Philos Trans R Soc Lond B Biol Sci 364(1521): 1211\u20131221","journal-title":"Philos Trans R Soc Lond B Biol Sci"},{"issue":"7","key":"512_CR31","doi-asserted-by":"crossref","first-page":"e6421","DOI":"10.1371\/journal.pone.0006421","volume":"4","author":"KJ Friston","year":"2009","unstructured":"Friston KJ, Daunizeau J, Kiebel SJ (2009) Active inference or reinforcement learning?. PLoS One 4(7): e6421","journal-title":"PLoS One"},{"issue":"3","key":"512_CR32","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1007\/s00422-010-0364-z","volume":"102","author":"KJ Friston","year":"2010","unstructured":"Friston KJ, Daunizeau J, Kilner J, Kiebel SJ (2010) Action and behavior: a free-energy formulation. Biol Cybern 102(3): 227\u2013260","journal-title":"Biol Cybern"},{"issue":"1","key":"512_CR33","doi-asserted-by":"crossref","first-page":"e1002327","DOI":"10.1371\/journal.pcbi.1002327","volume":"8","author":"KJST Friston","year":"2012","unstructured":"Friston KJST, Fitzgerald T, Galea JM, Adams R, Brown H, Dolan RJ, Moran R, Stephan KE, Bestmann S (2012) Dopamine, affordance and active inference. PLoS Comput Biol 8(1): e1002327","journal-title":"PLoS Comput Biol"},{"issue":"1\u20133","key":"512_CR34","doi-asserted-by":"crossref","first-page":"70","DOI":"10.1016\/j.jphysparis.2006.10.001","volume":"100","author":"K Friston","year":"2006","unstructured":"Friston K, Kilner J, Harrison L (2006) A free energy principle for the brain. J Physiol Paris 100(1\u20133): 70\u201387","journal-title":"J Physiol Paris"},{"key":"512_CR35","doi-asserted-by":"crossref","first-page":"137","DOI":"10.1007\/s00422-011-0424-z","volume":"104","author":"K Friston","year":"2011","unstructured":"Friston K, Mattout J, Kilner J (2011) Action understanding and active inference. Biol Cybern 104: 137\u2013160","journal-title":"Biol Cybern"},{"issue":"2","key":"512_CR36","doi-asserted-by":"crossref","first-page":"229","DOI":"10.1016\/0306-4522(94)90592-4","volume":"59","author":"KJ Friston","year":"1994","unstructured":"Friston KJ, Tononi G, Reeke GNJ, Sporns O, Edelman GM (1994) Value-dependent selection in the brain: simulation in a synthetic neural model. Neuroscience 59(2):229\u2013243","journal-title":"Neuroscience"},{"key":"512_CR37","doi-asserted-by":"crossref","first-page":"451","DOI":"10.1146\/annurev-psych-120709-145346","volume":"62","author":"G Gigerenzer","year":"2011","unstructured":"Gigerenzer G, Gaissmaier W (2011) Heuristic decision making. Annu Rev Psychol 62: 451\u2013482","journal-title":"Annu Rev Psychol"},{"issue":"4","key":"512_CR38","doi-asserted-by":"crossref","first-page":"585","DOI":"10.1016\/j.neuron.2010.04.016","volume":"66","author":"J Gl\u00e4scher","year":"2010","unstructured":"Gl\u00e4scher J, Daw N, Dayan P, O\u2019Doherty JP (2010) States versus rewards: dissociable neural prediction error signals underlying model-based and model-free reinforcement learning. Neuron 66(4): 585\u2013595","journal-title":"Neuron"},{"key":"512_CR39","unstructured":"Gomez F, Miikkulainen R (2001) Learning robust nonlinear control with neuroevolution. Technical Report AI01-292, Department of Computer Sciences, The University of Texas at Austin"},{"key":"512_CR40","first-page":"937","volume":"9","author":"F Gomez","year":"2009","unstructured":"Gomez F, Schmidhuber J, Miikkulainen R (2009) Accelerated neural evolution through cooperatively coevolved synapses. J Mach Learn Res 9: 937\u2013965","journal-title":"J Mach Learn Res"},{"key":"512_CR41","unstructured":"Helmholtz H (1866\/1962), Concerning the perceptions in general. In: Treatise on physiological optics, 3rd edn. Dover, New York"},{"key":"512_CR42","unstructured":"Hinton GE, van Camp D (1993) Keeping neural networks simple by minimizing the description length of weights. In: Proceedings of COLT-93,pp 5\u201313"},{"key":"512_CR43","unstructured":"Hoffman, M, de Freitas, N, Doucet, A, Peters J (2009) An expectation maximization algorithm for continuous markov decision processes with arbitrary rewards. In: Twelfth Int. Conf. on artificial intelligence and statistics (AISTATS 2009)"},{"key":"512_CR44","volume-title":"Dynamic programming and Markov processes","author":"RA Howard","year":"1960","unstructured":"Howard RA (1960) Dynamic programming and Markov processes. MIT Press Cambridge, MA"},{"key":"512_CR45","doi-asserted-by":"crossref","first-page":"1371","DOI":"10.1162\/089976600300015411","volume":"12","author":"H Jaeger","year":"2000","unstructured":"Jaeger H (2000) Observable operator models for discrete stochastic time series. Neural Comput 12: 1371\u20131398","journal-title":"Neural Comput"},{"key":"512_CR46","doi-asserted-by":"crossref","unstructured":"Jensen F, Jensen V, Dittmer SL (1994) From influence diagrams to junction trees. In: Proc. of the Tenth Conference on uncertainty in artificial intelligence. Morgan Kaufmann, San Fransisco","DOI":"10.1016\/B978-1-55860-332-5.50051-1"},{"issue":"1\u20132","key":"512_CR47","doi-asserted-by":"crossref","first-page":"99","DOI":"10.1016\/S0004-3702(98)00023-X","volume":"101","author":"LP Kaelbling","year":"1998","unstructured":"Kaelbling LP, Littman ML, Cassandra AR (1998) Planning and acting in partially observable stochastic domains. Artif Intell 101 (1\u20132): 99\u2013134","journal-title":"Artif Intell"},{"issue":"20","key":"512_CR48","doi-asserted-by":"crossref","first-page":"200201","DOI":"10.1103\/PhysRevLett.95.200201","volume":"95","author":"HJ Kappen","year":"2005","unstructured":"Kappen HJ (2005) Linear theory for control of nonlinear stochastic systems. Phys Rev Lett 95(20): 200201","journal-title":"Phys Rev Lett"},{"key":"512_CR49","doi-asserted-by":"crossref","first-page":"P11011","DOI":"10.1088\/1742-5468\/2005\/11\/P11011","volume":"11","author":"HJ Kappen","year":"2005","unstructured":"Kappen HJ (2005) Path integrals and symmetry breaking for optimal control theory. J Stat Mech: Theory Exp 11: P11011","journal-title":"J Stat Mech: Theory Exp"},{"key":"512_CR50","unstructured":"Kappen HJ, Gomez Y, Opper M (2009) Optimal control as a graphical model inference problem. arXiv:0901.0633v2"},{"key":"512_CR51","doi-asserted-by":"crossref","first-page":"20","DOI":"10.3389\/neuro.11.020.2009","volume":"3","author":"SJ Kiebel","year":"2009","unstructured":"Kiebel SJ, Daunizeau J, Friston KJ (2009a) Perception and hierarchical dynamics. Front Neuroinf 3: 20","journal-title":"Front Neuroinf"},{"key":"512_CR52","doi-asserted-by":"crossref","unstructured":"Kiebel SJ, von Kriegstein K, Daunizeau J, Friston KJ (2009b) Recognizing sequences of sequences. PLoS Comput Biol 5(8):e1000464","DOI":"10.1371\/journal.pcbi.1000464"},{"issue":"4","key":"512_CR53","doi-asserted-by":"crossref","first-page":"543","DOI":"10.1016\/j.neuron.2010.07.021","volume":"67","author":"KT Kishida","year":"2010","unstructured":"Kishida KT, King-Casas B, Montague PR (2010) Neuroeconomic approaches to mental disorders. Neuron 67(4): 543\u2013554","journal-title":"Neuron"},{"issue":"3","key":"512_CR54","doi-asserted-by":"crossref","first-page":"251","DOI":"10.1023\/A:1017584715408","volume":"27","author":"ML Littman","year":"2001","unstructured":"Littman ML, Majercik SM, Pitassi T (2001) Stochastic boolean satisfiability. J Autom Reason 27(3): 251\u2013296","journal-title":"J Autom Reason"},{"key":"512_CR55","unstructured":"Littman ML, Sutton RS, Singh S (2002) Predictive Representations of State. Adv Neural Inf Process Syst 14"},{"key":"512_CR56","doi-asserted-by":"crossref","first-page":"445","DOI":"10.1049\/el:19950331","volume":"31","author":"DJ MacKay","year":"1995","unstructured":"MacKay DJ (1995) Free-energy minimisation algorithm for decoding and cryptoanalysis. Electron Lett 31: 445\u2013447","journal-title":"Electron Lett"},{"issue":"6551","key":"512_CR57","doi-asserted-by":"crossref","first-page":"725","DOI":"10.1038\/377725a0","volume":"377","author":"PR Montague","year":"1995","unstructured":"Montague PR, Dayan P, Person C, Sejnowski TJ (1995) Bee foraging in uncertain environments using predictive Hebbian learning. Nature 377(6551): 725\u2013728","journal-title":"Nature"},{"key":"512_CR58","first-page":"1","volume":"7","author":"M Moutoussis","year":"2011","unstructured":"Moutoussis M, Bentall RP, El-Deredy W, Dayan P (2011) Bayesian modelling of Jumping-to-conclusions bias in delusional patients. Cogn Neuropsychiatry 7: 1\u201326","journal-title":"Cogn Neuropsychiatry"},{"issue":"10","key":"512_CR59","doi-asserted-by":"crossref","first-page":"e1002221","DOI":"10.1371\/journal.pcbi.1002221","volume":"7","author":"J Namikawa","year":"2011","unstructured":"Namikawa J, Nishimoto R, Tani J (2011) A neurodynamic account of spontaneous behaviour. PLoS Comput Biol. 7(10): e1002221","journal-title":"PLoS Comput Biol."},{"key":"512_CR60","volume-title":"Learning in graphical models","author":"RM Neal","year":"1998","unstructured":"Neal RM, Hinton GE (1998) A view of the EM algorithm that justifies incremental sparse and other variants. In: Jordan M (ed) Learning in graphical models. Kluwer Academic, Dordrecht"},{"key":"512_CR61","unstructured":"Oliehoek F, Spaan MTJ, Vlassis N (2005) Best-response play in partially observable card games. In: Proceedings of the 14th Annual Machine Learning Conference of Belgium and the Netherlands"},{"key":"512_CR62","volume-title":"Probabilistic reasoning in intelligent systems: networks of plausible inference","author":"J Pearl","year":"1988","unstructured":"Pearl J (1988) Probabilistic reasoning in intelligent systems: networks of plausible inference. Morgan Kaufmann, San Fransisco"},{"key":"512_CR63","first-page":"146","volume":"4","author":"RP Rao","year":"2010","unstructured":"Rao RP (2010) Decision making under uncertainty: a neural model based on partially observable markov decision processes. Front Comput Neurosci 4: 146","journal-title":"Front Comput Neurosci"},{"issue":"1","key":"512_CR64","doi-asserted-by":"crossref","first-page":"79","DOI":"10.1038\/4580","volume":"2","author":"RP Rao","year":"1999","unstructured":"Rao RP, Ballard DH (1999) Predictive coding in the visual cortex: a functional interpretation of some extra-classical receptive-field effects. Nat Neurosci 2(1): 79\u201387","journal-title":"Nat Neurosci"},{"key":"512_CR65","unstructured":"Rawlik K, Toussaint M, Vijayakumar S (2010) Approximate inference and stochastic optimal control. arXiv:1009.3958"},{"key":"512_CR66","volume-title":"Classical conditioning II: current research and theory.","author":"RA Rescorla","year":"1972","unstructured":"Rescorla RA, Wagner AR (1972) A theory of Pavlovian conditioning: variations in the effectiveness of reinforcement and nonreinforcement. In: Black A, Prokasy W (eds) Classical conditioning II: current research and theory. Appleton Century Crofts, New York"},{"key":"512_CR67","unstructured":"Robert C (1992) L\u2019analyse statistique Bayesienne. In: Economica. Paris, France"},{"key":"512_CR68","doi-asserted-by":"crossref","first-page":"589","DOI":"10.1287\/opre.36.4.589","volume":"36","author":"RD Shachter","year":"1988","unstructured":"Shachter RD (1988) Probabilistic inference and influence diagrams. Operat Res 36: 589\u2013605","journal-title":"Operat Res"},{"key":"512_CR69","unstructured":"Silver D, Veness J (2010) Monte-Carlo planning in large POMDPs. In: Proceedings of the Conference on neural information processing systems"},{"issue":"2","key":"512_CR70","doi-asserted-by":"crossref","first-page":"135","DOI":"10.1037\/0033-295X.88.2.135","volume":"88","author":"RS Sutton","year":"1981","unstructured":"Sutton RS, Barto AG (1981) Toward a modern theory of adaptive networks: expectation and prediction. Psychol Rev 88(2): 135\u2013170","journal-title":"Psychol Rev"},{"issue":"1","key":"512_CR71","doi-asserted-by":"crossref","first-page":"11","DOI":"10.1016\/S0893-6080(02)00214-9","volume":"16","author":"J Tani","year":"2003","unstructured":"Tani J (2003) Learning to generate articulated behavior through the bottom-up and the top-down interaction processes. Neural Netw 16(1): 11\u201323","journal-title":"Neural Netw"},{"key":"512_CR72","first-page":"3137","volume":"11","author":"E Theodorou","year":"2010","unstructured":"Theodorou E, Buchli J, Schaal S (2010) A generalized path integral control approach to reinforcement learning. J Mach Learn Res 11: 3137\u20133181","journal-title":"J Mach Learn Res"},{"key":"512_CR73","doi-asserted-by":"crossref","unstructured":"Todorov E (2006) Linearly-solvable Markov decision problems. In: Advances in neural information processing systems. MIT Press, Boston","DOI":"10.7551\/mitpress\/7503.003.0176"},{"key":"512_CR74","doi-asserted-by":"crossref","unstructured":"Todorov E (2008) General duality between optimal control and estimation. In: IEEE Conference on decision and control","DOI":"10.1109\/CDC.2008.4739438"},{"key":"512_CR75","unstructured":"Toussaint M, Charlin L, Poupart P (2008) Hierarchical POMDP controller optimization by likelihood maximization. In: Uncertainty in artificial intelligence (UAI 2008), AUAI Press, Menlo Park"},{"key":"512_CR76","doi-asserted-by":"crossref","unstructured":"Toussaint M, Storkey A (2006) Probabilistic inference for solving discrete and continuous state Markov decision processes. In: Proceedings of the 23nd International Conference on machine learning","DOI":"10.1145\/1143844.1143963"},{"issue":"1","key":"512_CR77","first-page":"95","volume":"32","author":"B Broek van den","year":"2008","unstructured":"van den Broek B, Wiegerinck W, Kappen B (2008) Graphical model inference in optimal control of stochastic multi-agent systems. J Artif Int Res 32(1): 95\u2013122","journal-title":"J Artif Int Res"},{"key":"512_CR78","first-page":"279","volume":"8","author":"CJ Watkins","year":"1992","unstructured":"Watkins CJ, Dayan P (1992) Q-learning. Mach Learn 8: 279\u2013292","journal-title":"Mach Learn"},{"key":"512_CR79","first-page":"229","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams RJ (1992) Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach Learn 8: 229\u2013256","journal-title":"Mach Learn"},{"issue":"4","key":"512_CR80","doi-asserted-by":"crossref","first-page":"475","DOI":"10.1111\/0824-7935.00073","volume":"14","author":"NL Zhang","year":"1998","unstructured":"Zhang NL (1998) Probabilistic inference in influence diagrams. Comput Intell 14(4): 475\u2013497","journal-title":"Comput Intell"}],"container-title":["Biological Cybernetics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/www.springerlink.com\/index\/pdf\/10.1007\/s00422-012-0512-8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,28]],"date-time":"2024-04-28T02:54:07Z","timestamp":1714272847000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00422-012-0512-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,8,3]]},"references-count":80,"journal-issue":{"issue":"8-9","published-print":{"date-parts":[[2012,10]]}},"alternative-id":["512"],"URL":"https:\/\/doi.org\/10.1007\/s00422-012-0512-8","relation":{},"ISSN":["0340-1200","1432-0770"],"issn-type":[{"value":"0340-1200","type":"print"},{"value":"1432-0770","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012,8,3]]}}}