{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T01:36:07Z","timestamp":1742952967350,"version":"3.40.3"},"publisher-location":"Cham","reference-count":61,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031287183"},{"type":"electronic","value":"9783031287190"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-28719-0_25","type":"book-chapter","created":{"date-parts":[[2023,3,21]],"date-time":"2023-03-21T18:03:51Z","timestamp":1679421831000},"page":"355-370","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Deriving Time-Averaged Active Inference from\u00a0Control Principles"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7014-8471","authenticated-orcid":false,"given":"Eli","family":"Sennesh","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4680-0172","authenticated-orcid":false,"given":"Jordan","family":"Theriault","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9465-5398","authenticated-orcid":false,"given":"Jan-Willem","family":"van de Meent","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4478-2051","authenticated-orcid":false,"given":"Lisa Feldman","family":"Barrett","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8844-990X","authenticated-orcid":false,"given":"Karen","family":"Quigley","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,3,22]]},"reference":[{"issue":"3","key":"25_CR1","doi-asserted-by":"publisher","first-page":"611","DOI":"10.1007\/S00429-012-0475-5","volume":"218","author":"RA Adams","year":"2013","unstructured":"Adams, R.A., Shipp, S., Friston, K.J.: Predictions not commands: active inference in the motor system. Brain Struct. Funct. 218(3), 611\u2013643 (2013). https:\/\/doi.org\/10.1007\/S00429-012-0475-5","journal-title":"Brain Struct. Funct."},{"issue":"6","key":"25_CR2","doi-asserted-by":"publisher","first-page":"1511","DOI":"10.1162\/neco.2010.08-09-1080","volume":"22","author":"WH Alexander","year":"2010","unstructured":"Alexander, W.H., Brown, J.W.: Hyperbolically discounted temporal difference learning. Neural Comput. 22(6), 1511\u20131527 (2010). https:\/\/doi.org\/10.1162\/neco.2010.08-09-1080","journal-title":"Neural Comput."},{"key":"25_CR3","doi-asserted-by":"publisher","unstructured":"Barrett, L.F., Simmons, W.K.: Interoceptive predictions in the brain. Nature Rev. Neurosci. 16(7), 419\u2013429 (2015). https:\/\/doi.org\/10.1038\/nrn3950. https:\/\/www.nature.com\/articles\/nrn3950","DOI":"10.1038\/nrn3950"},{"issue":"4","key":"25_CR4","doi-asserted-by":"publisher","first-page":"695","DOI":"10.1016\/j.neuron.2012.10.038","volume":"76","author":"AM Bastos","year":"2012","unstructured":"Bastos, A.M., Usrey, W.M., Adams, R.A., Mangun, G.R., Fries, P., Friston, K.J.: Canonical microcircuits for predictive coding. Neuron 76(4), 695\u2013711 (2012)","journal-title":"Neuron"},{"key":"25_CR5","doi-asserted-by":"publisher","first-page":"319","DOI":"10.1613\/jair.806","volume":"15","author":"J Baxter","year":"2001","unstructured":"Baxter, J., Bartlett, P.L.: Infinite-horizon policy-gradient estimation. J. Artif. Intell. Res. 15, 319\u2013350 (2001). https:\/\/doi.org\/10.1613\/jair.806","journal-title":"J. Artif. Intell. Res."},{"key":"25_CR6","doi-asserted-by":"publisher","first-page":"198","DOI":"10.1016\/j.jmp.2015.11.003","volume":"76","author":"R Bogacz","year":"2017","unstructured":"Bogacz, R.: A tutorial on the free-energy framework for modelling perception and learning. J. Math. Psychol. 76, 198\u2013211 (2017)","journal-title":"J. Math. Psychol."},{"key":"25_CR7","doi-asserted-by":"publisher","unstructured":"Camacho, A., Icarte, R.T., Klassen, T.Q., Valenzano, R., McIlraith, S.A.: LTL and beyond: formal languages for reward function specification in reinforcement learning. In: IJCAI International Joint Conference on Artificial Intelligence, vol. 19, pp. 6065\u20136073 (2019). https:\/\/doi.org\/10.24963\/ijcai.2019\/840","DOI":"10.24963\/ijcai.2019\/840"},{"issue":"4","key":"25_CR8","doi-asserted-by":"publisher","first-page":"180","DOI":"10.1152\/advan.00012.2004","volume":"28","author":"R Carpenter","year":"2004","unstructured":"Carpenter, R.: Homeostasis: a plea for a unified approach. Adv. Physiol. Educ. 28(4), 180\u2013187 (2004)","journal-title":"Adv. Physiol. Educ."},{"key":"25_CR9","doi-asserted-by":"publisher","first-page":"269","DOI":"10.1146\/annurev.neuro.051508.135409","volume":"33","author":"P Cisek","year":"2010","unstructured":"Cisek, P., Kalaska, J.F.: Neural mechanisms for interacting with a world full of action choices. Annu. Rev. Neurosci. 33, 269\u2013298 (2010). https:\/\/doi.org\/10.1146\/annurev.neuro.051508.135409","journal-title":"Annu. Rev. Neurosci."},{"key":"25_CR10","doi-asserted-by":"crossref","unstructured":"Corcoran, A.W., Hohwy, J.: Allostasis, interoception, and the free energy principle: feeling our way forward. In: The Interoceptive Mind: From homeostasis to awareness, pp. 272\u2013292. Oxford University Press (2019)","DOI":"10.1093\/oso\/9780198811930.003.0015"},{"key":"25_CR11","doi-asserted-by":"publisher","first-page":"102447","DOI":"10.1016\/j.jmp.2020.102447","volume":"99","author":"L Da Costa","year":"2020","unstructured":"Da Costa, L., Parr, T., Sajid, N., Veselic, S., Neacsu, V., Friston, K.: Active inference on discrete state-spaces: a synthesis. J. Math. Psychol. 99, 102447 (2020)","journal-title":"J. Math. Psychol."},{"key":"25_CR12","doi-asserted-by":"publisher","first-page":"679","DOI":"10.1016\/S0925-2312(00)00232-0","volume":"32","author":"ND Daw","year":"2000","unstructured":"Daw, N.D., Touretzky, D.S.: Behavioral considerations suggest an average reward td model of the dopamine system. Neurocomputing 32, 679\u2013684 (2000)","journal-title":"Neurocomputing"},{"issue":"4","key":"25_CR13","doi-asserted-by":"publisher","first-page":"292","DOI":"10.1038\/nrn2258","volume":"9","author":"AA Faisal","year":"2008","unstructured":"Faisal, A.A., Selen, L.P., Wolpert, D.M.: Noise in the nervous system. Nat. Rev. Neurosci. 9(4), 292\u2013303 (2008)","journal-title":"Nat. Rev. Neurosci."},{"issue":"1","key":"25_CR14","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1080\/00222895.1986.10735369","volume":"18","author":"AG Feldman","year":"1986","unstructured":"Feldman, A.G.: Once more on the equilibrium-point hypothesis ($$\\lambda $$ model) for motor control. J. Mot. Behav. 18(1), 17\u201354 (1986). https:\/\/doi.org\/10.1080\/00222895.1986.10735369","journal-title":"J. Mot. Behav."},{"key":"25_CR15","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4939-2736-4","volume-title":"Referent Control of Action and Perception","author":"Anatol G. Feldman","year":"2015","unstructured":"Feldman, Anatol G..: Referent Control of Action and Perception. Springer, New York (2015). https:\/\/doi.org\/10.1007\/978-1-4939-2736-4"},{"issue":"2","key":"25_CR16","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1038\/nrn2787","volume":"11","author":"K Friston","year":"2010","unstructured":"Friston, K.: The free-energy principle: a unified brain theory? Nat. Rev. Neurosci. 11(2), 127\u2013138 (2010)","journal-title":"Nat. Rev. Neurosci."},{"issue":"1","key":"25_CR17","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1162\/NECO_a_00912","volume":"29","author":"K Friston","year":"2017","unstructured":"Friston, K., FitzGerald, T., Rigoli, F., Schwartenbeck, P., Pezzulo, G.: Active inference: a process theory. Neural Comput. 29(1), 1\u201349 (2017)","journal-title":"Neural Comput."},{"issue":"8\u20139","key":"25_CR18","doi-asserted-by":"publisher","first-page":"523","DOI":"10.1007\/s00422-012-0512-8","volume":"106","author":"K Friston","year":"2012","unstructured":"Friston, K., Samothrakis, S., Montague, R.: Active inference and agency: optimal control without cost functions. Biol. Cybern. 106(8\u20139), 523\u2013541 (2012). https:\/\/doi.org\/10.1007\/s00422-012-0512-8","journal-title":"Biol. Cybern."},{"key":"25_CR19","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1155\/2010\/621670","volume":"2010","author":"K Friston","year":"2010","unstructured":"Friston, K., Stephan, K., Li, B., Daunizeau, J.: Generalised filtering. Math. Prob. Eng. 2010, 1\u201335 (2010)","journal-title":"Math. Prob. Eng."},{"issue":"7","key":"25_CR20","doi-asserted-by":"publisher","first-page":"e6421","DOI":"10.1371\/journal.pone.0006421","volume":"4","author":"KJ Friston","year":"2009","unstructured":"Friston, K.J., Daunizeau, J., Kiebel, S.J.: Reinforcement learning or active inference? PLoS ONE 4(7), e6421 (2009)","journal-title":"PLoS ONE"},{"issue":"3","key":"25_CR21","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1007\/s00422-010-0364-z","volume":"102","author":"KJ Friston","year":"2010","unstructured":"Friston, K.J., Daunizeau, J., Kilner, J., Kiebel, S.J.: Action and behavior: a free-energy formulation. Biol. Cybern. 102(3), 227\u2013260 (2010). https:\/\/doi.org\/10.1007\/s00422-010-0364-z","journal-title":"Biol. Cybern."},{"key":"25_CR22","doi-asserted-by":"publisher","unstructured":"Friston, K.J., Rosch, R., Parr, T., Price, C., Bowman, H.: Deep temporal models and active inference. Neurosci. Biobehav. Rev. 77(April), 388\u2013402 (2017). https:\/\/doi.org\/10.1016\/j.neubiorev.2017.04.009. citation Key: Friston 2017","DOI":"10.1016\/j.neubiorev.2017.04.009"},{"issue":"9","key":"25_CR23","doi-asserted-by":"publisher","first-page":"519","DOI":"10.1038\/s41583-018-0045-9","volume":"19","author":"JP Gallivan","year":"2018","unstructured":"Gallivan, J.P., Chapman, C.S., Wolpert, D.M., Flanagan, J.R.: Decision-making in sensorimotor control. Nat. Rev. Neurosci. 19(9), 519\u2013534 (2018)","journal-title":"Nat. Rev. Neurosci."},{"key":"25_CR24","doi-asserted-by":"crossref","unstructured":"Howard, M.W.: Formal models of memory based on temporally-varying representations. In: The New Handbook of Mathematical Psychology, vol. 3. Cambridge University Press (2022)","DOI":"10.1017\/9781108902724.006"},{"key":"25_CR25","unstructured":"Icarte, R.T., Klassen, T.Q., Valenzano, R., McIlraith, S.A.: Using reward machines for high-level task specification and decomposition in reinforcement learning. In: 35th International Conference on Machine Learning, ICML 2018, vol. 5, pp. 3347\u20133358 (2018)"},{"key":"25_CR26","unstructured":"Jahromi, M.J., Jain, R., Nayyar, A.: Online learning for unknown partially observable mdps. In: Proceedings of the 25th International Conference on Artificial Intelligence and Statistics (AISTATS). Proceedings of Machine Learning Research, Valencia, Spain, vol. 151, p. 21 (2022)"},{"issue":"11","key":"25_CR27","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1371\/journal.pcbi.1000209","volume":"4","author":"SJ Kiebel","year":"2008","unstructured":"Kiebel, S.J., Daunizeau, J., Friston, K.J.: A hierarchy of time-scales and the brain. PLOS Comput. Bio. 4(11), 1\u201312 (2008). https:\/\/doi.org\/10.1371\/journal.pcbi.1000209","journal-title":"PLOS Comput. Bio."},{"key":"25_CR28","unstructured":"Lanillos, P., et al.: Active inference in robotics and artificial agents: survey and challenges. (arXiv:2112.01871), https:\/\/arxiv.org\/abs\/2112.01871 [cs] (2021)"},{"issue":"3","key":"25_CR29","doi-asserted-by":"publisher","first-page":"294","DOI":"10.1123\/mcj.14.3.294","volume":"14","author":"ML Latash","year":"2010","unstructured":"Latash, M.L.: Motor synergies and the equilibrium-point hypothesis. Mot. Control 14(3), 294\u2013322 (2010). https:\/\/doi.org\/10.1123\/mcj.14.3.294","journal-title":"Mot. Control"},{"key":"25_CR30","doi-asserted-by":"publisher","DOI":"10.1016\/C2018-0-04663-0","author":"ML Latash","year":"2019","unstructured":"Latash, M.L.: Physics of biological action and perception. Academic Press (2019). https:\/\/doi.org\/10.1016\/C2018-0-04663-0","journal-title":"Academic Press"},{"issue":"6","key":"25_CR31","doi-asserted-by":"publisher","first-page":"1094","DOI":"10.1016\/j.neuron.2019.12.027","volume":"105","author":"Y Livneh","year":"2020","unstructured":"Livneh, Y., et al.: Estimation of current and future physiological states in insular cortex. Neuron 105(6), 1094-1111.e10 (2020). https:\/\/doi.org\/10.1016\/j.neuron.2019.12.027","journal-title":"Neuron"},{"issue":"13","key":"25_CR32","doi-asserted-by":"publisher","first-page":"1707","DOI":"10.1016\/j.cub.2015.05.038","volume":"25","author":"SG Manohar","year":"2015","unstructured":"Manohar, S.G., et al.: Reward pays the cost of noise reduction in motor and cognitive control. Curr. Biol. 25(13), 1707\u20131716 (2015)","journal-title":"Curr. Biol."},{"issue":"1","key":"25_CR33","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/s41467-019-13239-6","volume":"10","author":"J Merel","year":"2019","unstructured":"Merel, J., Botvinick, M., Wayne, G.: Hierarchical motor control in mammals and machines. Nat. Commun. 10(1), 1\u201312 (2019). https:\/\/doi.org\/10.1038\/s41467-019-13239-6","journal-title":"Nat. Commun."},{"issue":"10","key":"25_CR34","doi-asserted-by":"publisher","first-page":"1945","DOI":"10.1162\/neco_a_01219","volume":"31","author":"BA Mitchell","year":"2019","unstructured":"Mitchell, B.A., et al.: A minimum free energy model of motor learning. Neural Comput. 31(10), 1945\u20131963 (2019)","journal-title":"Neural Comput."},{"key":"25_CR35","doi-asserted-by":"crossref","unstructured":"Morville, T., Friston, K., Burdakov, D., Siebner, H.R., Hulme, O.J.: The homeostatic logic of reward. bioRxiv, p. 242974 (2018)","DOI":"10.1101\/242974"},{"key":"25_CR36","volume-title":"Rheostasis: The Physiology of Change","author":"N Mrosovsky","year":"1990","unstructured":"Mrosovsky, N.: Rheostasis: The Physiology of Change. Oxford University Press, Oxford (1990)"},{"key":"25_CR37","unstructured":"Nasiriany, S., Lin, S., Levine, S.: Planning with goal-conditioned policies. In: Advances in Neural Information Processing Systems. No. NeurIPS (2019)"},{"key":"25_CR38","doi-asserted-by":"crossref","unstructured":"Nasiriany, S., Pong, V.H., Nair, A., Khazatsky, A., Berseth, G., Levine, S.: DisCo RL: distribution-conditioned reinforcement learning for general-purpose policies. In: IEEE International Conference on Robotics and Automation (2021). https:\/\/arxiv.org\/abs\/2104.11707","DOI":"10.1109\/ICRA48506.2021.9561402"},{"key":"25_CR39","doi-asserted-by":"publisher","unstructured":"Pan, Y., Theodorou, E.A.: Nonparametric infinite horizon Kullback-Leibler stochastic control. In: IEEE SSCI 2014 IEEE Symposium Series on Computational Intelligence - ADPRL 2014: 2014 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning, Proceedings, vol. 2(2) (2014). https:\/\/doi.org\/10.1109\/ADPRL.2014.7010616","DOI":"10.1109\/ADPRL.2014.7010616"},{"issue":"6","key":"25_CR40","doi-asserted-by":"publisher","first-page":"414","DOI":"10.1016\/j.tics.2016.03.013","volume":"20","author":"G Pezzulo","year":"2016","unstructured":"Pezzulo, G., Cisek, P.: Navigating the affordance landscape: feedback control as a process model of behavior and cognition. Trends Cogn. Sci. 20(6), 414\u2013424 (2016). https:\/\/doi.org\/10.1016\/j.tics.2016.03.013","journal-title":"Trends Cogn. Sci."},{"key":"25_CR41","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1016\/j.pneurobio.2015.09.001","volume":"134","author":"G Pezzulo","year":"2015","unstructured":"Pezzulo, G., Rigoli, F., Friston, K.: Active inference, homeostatic regulation and adaptive behavioural control. Prog. Neurobiol. 134, 17\u201335 (2015)","journal-title":"Prog. Neurobiol."},{"issue":"4","key":"25_CR42","doi-asserted-by":"publisher","first-page":"294","DOI":"10.1016\/j.tics.2018.01.009","volume":"22","author":"G Pezzulo","year":"2018","unstructured":"Pezzulo, G., Rigoli, F., Friston, K.J.: Hierarchical active inference: a theory of motivated control. Trends Cogn. Sci. 22(4), 294\u2013306 (2018). https:\/\/doi.org\/10.1016\/j.tics.2018.01.009","journal-title":"Trends Cogn. Sci."},{"issue":"1","key":"25_CR43","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/s41467-021-25123-3","volume":"12","author":"P Piray","year":"2021","unstructured":"Piray, P., Daw, N.D.: Linear reinforcement learning in planning, grid fields, and cognitive control. Nat. Commun. 12(1), 1\u201320 (2021)","journal-title":"Nat. Commun."},{"issue":"1","key":"25_CR44","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1016\/j.tins.2020.09.008","volume":"44","author":"KS Quigley","year":"2021","unstructured":"Quigley, K.S., Kanoski, S., Grill, W.M., Barrett, L.F., Tsakiris, M.: Functions of interoception: from energy regulation to experience of the self. Trends in Neurosci. 44(1), 29\u201338 (2021). https:\/\/doi.org\/10.1016\/j.tins.2020.09.008","journal-title":"Trends in Neurosci."},{"key":"25_CR45","unstructured":"Ramstead, M.J., et al.: On Bayesian mechanics: a physics of and by beliefs. arXiv preprint arXiv:2205.11543 (2022)"},{"key":"25_CR46","unstructured":"Ringstrom, T.J., Hasanbeig, M., Abate, A.: Jump operator planning: Goal-conditioned policy ensembles and zero-shot transfer. arXiv preprint arXiv:2007.02527 (2020)"},{"issue":"3","key":"25_CR47","doi-asserted-by":"publisher","first-page":"289","DOI":"10.1007\/s002210050738","volume":"126","author":"JP Scholz","year":"1999","unstructured":"Scholz, J.P., Sch\u00f6ner, G.: The uncontrolled manifold concept: identifying control variables for a functional task. Exp. Brain Res. 126(3), 289\u2013306 (1999)","journal-title":"Exp. Brain Res."},{"issue":"10","key":"25_CR48","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1016\/j.tins.2019.07.010","volume":"42","author":"J Schulkin","year":"2019","unstructured":"Schulkin, J., Sterling, P.: Allostasis: a brain-centered, predictive mode of physiological regulation. Trends Neurosci. 42(10), 740\u2013752 (2019)","journal-title":"Trends Neurosci."},{"key":"25_CR49","doi-asserted-by":"publisher","first-page":"108242","DOI":"10.1016\/j.biopsycho.2021.108242","volume":"167","author":"E Sennesh","year":"2021","unstructured":"Sennesh, E., Theriault, J., Brooks, D., van de Meent, J.W., Barrett, L.F., Quigley, K.S.: Interoception as modeling, allostasis as control. Biol. Psychol. 167, 108242 (2021)","journal-title":"Biol. Psychol."},{"key":"25_CR50","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/12940.001.0001","volume-title":"Vigor: Neuroeconomics of movement control","author":"R Shadmehr","year":"2020","unstructured":"Shadmehr, R., Ahmed, A.A.: Vigor: Neuroeconomics of movement control. MIT Press, Cambridge (2020)"},{"issue":"1","key":"25_CR51","doi-asserted-by":"publisher","first-page":"134","DOI":"10.1162\/NECO_a_00212","volume":"24","author":"KH Shankar","year":"2012","unstructured":"Shankar, K.H., Howard, M.W.: A scale-invariant internal representation of time. Neural Comput. 24(1), 134\u2013193 (2012)","journal-title":"Neural Comput."},{"issue":"2","key":"25_CR52","doi-asserted-by":"publisher","first-page":"81","DOI":"10.1007\/s11229-022-03480-w","volume":"200","author":"R Smith","year":"2022","unstructured":"Smith, R., Ramstead, M.J., Kiefer, A.: Active inference models do not contradict folk psychology. Synthese 200(2), 81 (2022). https:\/\/doi.org\/10.1007\/s11229-022-03480-w","journal-title":"Synthese"},{"key":"25_CR53","doi-asserted-by":"publisher","DOI":"10.2307\/j.ctvs32s6b","volume-title":"Foraging Theory","author":"DW Stephens","year":"2019","unstructured":"Stephens, D.W., Krebs, J.R.: Foraging Theory. Princeton University Press, Princeton (2019)"},{"issue":"1","key":"25_CR54","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1016\/j.physbeh.2011.06.004","volume":"106","author":"P Sterling","year":"2012","unstructured":"Sterling, P.: Allostasis: a model of predictive regulation. Physiol. Behav. 106(1), 5\u201315 (2012)","journal-title":"Physiol. Behav."},{"issue":"1\u20132","key":"25_CR55","doi-asserted-by":"publisher","first-page":"177","DOI":"10.1016\/s0004-3702(98)00002-2","volume":"100","author":"P Tadepalli","year":"1998","unstructured":"Tadepalli, P., Ok, D.K.: Model-based average reward reinforcement learning. Artif. Intell. 100(1\u20132), 177\u2013224 (1998). https:\/\/doi.org\/10.1016\/s0004-3702(98)00002-2","journal-title":"Artif. Intell."},{"key":"25_CR56","unstructured":"Tang, Y., Kucukelbir, A.: Hindsight expectation maximization for goal-conditioned reinforcement learning. In: Proceedings of the 24th International Conference on Artificial Intelligence and Statistics (AISTATS), vol. 130 (2021). https:\/\/arxiv.org\/abs\/2006.07549"},{"key":"25_CR57","doi-asserted-by":"crossref","unstructured":"Theodorou, E.: Relative entropy and free energy dualities: connections to path integral and kl control. In: 2012 IEEE 51st IEEE Conference, pp. 1466\u20131473 (2012)","DOI":"10.1109\/CDC.2012.6426381"},{"issue":"3","key":"25_CR58","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1103\/PhysRevE.91.032104","volume":"91","author":"S Thijssen","year":"2015","unstructured":"Thijssen, S., Kappen, H.J.: Path integral control and state-dependent feedback. Phys. Rev. E Stat. Nonlinear Soft Matter Phys. 91(3), 1\u20137 (2015). https:\/\/doi.org\/10.1103\/PhysRevE.91.032104","journal-title":"Phys. Rev. E Stat. Nonlinear Soft Matter Phys."},{"issue":"28","key":"25_CR59","doi-asserted-by":"publisher","first-page":"11478","DOI":"10.1073\/pnas.0710743106","volume":"106","author":"E Todorov","year":"2009","unstructured":"Todorov, E.: Efficient computation of optimal actions. Proc. Natl. Acad. Sci. U.S.A. 106(28), 11478\u201311483 (2009). https:\/\/doi.org\/10.1073\/pnas.0710743106","journal-title":"Proc. Natl. Acad. Sci. U.S.A."},{"key":"25_CR60","doi-asserted-by":"publisher","unstructured":"Tschantz, A., Barca, L., Maisto, D., Buckley, C.L., Seth, A.K., Pezzulo, G.: Simulating homeostatic, allostatic and goal-directed forms of interoceptive control using active inference. Biol. Psychol. 169, 108266 (2022). https:\/\/doi.org\/10.1016\/j.biopsycho.2022.108266, https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0301051122000084","DOI":"10.1016\/j.biopsycho.2022.108266"},{"key":"25_CR61","unstructured":"Zhang, Y., Ross, K.W.: On-policy deep reinforcement learning for the average-reward criterion. In: Proceedings of the 38th International Conference on Machine Learning, p. 11 (2021)"}],"container-title":["Communications in Computer and Information Science","Active Inference"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-28719-0_25","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,9]],"date-time":"2023-12-09T08:51:28Z","timestamp":1702111888000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-28719-0_25"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031287183","9783031287190"],"references-count":61,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-28719-0_25","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"22 March 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"IWAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Workshop on Active Inference","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Grenoble","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"France","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 September 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 September 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iwai-ws2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/iwaiworkshop.github.io\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}