{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,29]],"date-time":"2025-09-29T18:40:02Z","timestamp":1759171202099,"version":"3.44.0"},"reference-count":31,"publisher":"Open Publishing Association","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Electron. Proc. Theor. Comput. Sci.","EPTCS"],"DOI":"10.4204\/eptcs.429.15","type":"journal-article","created":{"date-parts":[[2025,9,22]],"date-time":"2025-09-22T19:20:49Z","timestamp":1758568849000},"page":"270-286","source":"Crossref","is-referenced-by-count":0,"title":["Reinforcement Learning in Categorical Cybernetics"],"prefix":"10.4204","volume":"429","author":[{"given":"Jules","family":"Hedges","sequence":"first","affiliation":[{"name":"University of Strathclyde"}]},{"given":"Riu","family":"Rodr\u00edguez Sakamoto","sequence":"additional","affiliation":[{"name":"University of Strathclyde"}]}],"member":"2720","published-online":{"date-parts":[[2025,9,25]]},"reference":[{"key":"categories_containers","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-36576-1_2","article-title":"Categories of containers","volume-title":"Proceedings of FoSSACS 2003","author":"Abbott","year":"2003"},{"issue":"3","key":"parallel_computation","doi-asserted-by":"publisher","first-page":"610","DOI":"10.1109\/tac.1982.1102980","article-title":"Distributed dynamic programming","volume":"27","author":"Bertsekas","year":"1982","journal-title":"IEEE Transactions on Automatic Control"},{"key":"bertsekas_book","series-title":"Athena Scientific optimization and computation series","volume-title":"Reinforcement Learning and Optimal Control","author":"Bertsekas","year":"2019"},{"key":"compositional-bayesian","series-title":"Leibniz Proceedings in Informatics","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2305.06112","article-title":"The compositional structure of Bayesian inference","volume-title":"Proceedings of Mathematical Foundations of Computer Science 2023","volume":"272","author":"Braithwaite","year":"2023"},{"issue":"9","key":"bayesian_open_games","doi-asserted-by":"publisher","DOI":"10.32408\/compositionality-5-9","article-title":"Bayesian open games","volume":"5","author":"Bolt","year":"2023","journal-title":"Compositionality"},{"issue":"1","key":"botta_etal_sequential_decision_problems","doi-asserted-by":"publisher","DOI":"10.23638\/LMCS-13(1:7)2017","article-title":"Sequential decision problems, dependent types and generic solutions","volume":"13","author":"Botta","year":"2017","journal-title":"Logical Methods in Computer Science"},{"issue":"1","key":"categorical_update","doi-asserted-by":"publisher","DOI":"10.32408\/compositionality-6-1","article-title":"Profunctor optics, a categorical update","volume":"6","author":"Clarke","year":"2024","journal-title":"Compositionality"},{"key":"foundations-gradient-learning","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-99336-8_1","article-title":"Categorical foundations of gradient-based learning","volume-title":"Proceedings of ESOP 2022","volume":"13240","author":"Cruttwell","year":"2022"},{"key":"towards-foundations","series-title":"Electronic Proceedings in Theoretical Computer Science","doi-asserted-by":"publisher","DOI":"10.4204\/EPTCS.372.17","article-title":"Towards foundations of categorical cybernetics","volume-title":"Proceedings of Applied Category Theory 2021","volume":"372","author":"Capucci","year":"2022"},{"key":"delpeuch_free_automization","series-title":"Electronic Proceedings in Theoretical Computer Science","doi-asserted-by":"publisher","DOI":"10.4204\/EPTCS.323.3","article-title":"Autonomization of Monoidal Categories","volume-title":"Proceedings of Applied Category Theory 2019","author":"Delpeuch","year":"2020"},{"key":"representable_markov_categories","doi-asserted-by":"publisher","DOI":"10.1016\/j.tcs.2023.113896","article-title":"Representable Markov categories and comparison of statistical experiments in categorical probability","volume":"961","author":"Fritz","year":"2023","journal-title":"Theoretical computer science"},{"issue":"7","key":"fritz_perrone_probability_monad_colimit","doi-asserted-by":"publisher","first-page":"170","DOI":"10.48550\/arXiv.1712.05363","article-title":"A probability monad as the colimit of spaces of finite samples","volume":"34","author":"Fritz","year":"2019","journal-title":"Theory and applications of categories"},{"key":"Synthetic_approach","doi-asserted-by":"publisher","DOI":"10.1016\/j.aim.2020.107239","article-title":"A synthetic approach to Markov kernels, conditional independence and theorems on sufficient statistics","volume":"370","author":"Fritz","year":"2020","journal-title":"Advances in Mathematics"},{"volume-title":"Fundamental components of deep learning: A category-theoretic approach","year":"2024","author":"Gavranovi\\'c","key":"bruno_thesis"},{"key":"ghrist-persistent-topology","doi-asserted-by":"publisher","DOI":"10.1090\/S0273-0979-07-01191-3","article-title":"Barcodes: The persistent topology of data","volume":"45","author":"Ghrist","year":"2008","journal-title":"Bulletin of the American Mathematical Society"},{"key":"compositional-game-theory","doi-asserted-by":"publisher","DOI":"10.1145\/3209108.3209165","article-title":"Compositional game theory","volume-title":"Proceedings of Logic in Computer Science 2018","author":"Ghani","year":"2018"},{"key":"game_semantics_game_theory","series-title":"Outstanding contributions to logic","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-24117-8_11","article-title":"The game semantics of game theory","volume-title":"Samson Abramsky on Logic and Structure in Computer Science and Beyond","volume":"25","author":"Hedges","year":"2023"},{"key":"value-iteration-optic-composition","series-title":"Electronic Proceedings in Theoretical Computer Science","doi-asserted-by":"publisher","DOI":"10.4204\/EPTCS.380.24","article-title":"Value iteration is optic composition","volume-title":"Proceedings of Applied Category Theory 2022","volume":"380","author":"Hedges","year":"2023"},{"key":"hermida_tennent_monoidal_indeterminates","doi-asserted-by":"publisher","DOI":"10.1016\/j.tcs.2012.01.001","article-title":"Monoidal indeterminates and categories of possible worlds","volume":"430","author":"Hermida","year":"2012","journal-title":"Theoretical computer science"},{"key":"kaelbling_survey","doi-asserted-by":"publisher","first-page":"237","DOI":"10.1613\/jair.301","article-title":"Reinforcement Learning: A Survey","volume":"4","author":"Kaelbling","year":"1996","journal-title":"Journal of Artificial Intelligence Research"},{"key":"actor_critic","article-title":"Actor-Critic Algorithms","volume":"42","author":"Konda","year":"2001","journal-title":"Society for Industrial and Applied Mathematics"},{"key":"monoidal-streams","doi-asserted-by":"publisher","DOI":"10.1145\/3531130.3533365","article-title":"Monoidal streams for dataflow programming","volume-title":"Proceedings of Logic in Computer Science 2022","author":"di Lavore","year":"2022"},{"key":"fosco_book","doi-asserted-by":"publisher","DOI":"10.1017\/9781108778657","volume-title":"(Co)end calculus","author":"Loregian","year":"2021"},{"issue":"4","key":"pastro-street","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.0711.1859","article-title":"Doubles for monoidal categories","volume":"21","author":"Pastro","year":"2008","journal-title":"Theory and applications of categories"},{"volume-title":"Mathematical foundations for a compositional account of the Bayesian brain","year":"2023","author":"Smithe","key":"toby-thesis"},{"key":"reward_is_enough","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2021.103535","article-title":"Reward is enough","volume":"299","author":"Silver","year":"2021","journal-title":"Artificial Intelligence"},{"key":"alphago","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270","article-title":"Mastering the game of Go without human knowledge","volume":"550","author":"Silver","year":"2017","journal-title":"Nature"},{"key":"sarsa","series-title":"NIPS'95","doi-asserted-by":"publisher","first-page":"1038","DOI":"10.5555\/2998828.2998974","article-title":"Generalization in reinforcement learning: successful examples using sparse coarse coding","volume-title":"Proceedings of the 8th International Conference on Neural Information Processing Systems","author":"Sutton","year":"1995"},{"key":"alphastar","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z","article-title":"Grandmaster level in StarCraft II using multi-agent reinforcement learning","volume":"575","author":"Vinyals","year":"2019","journal-title":"Nature"},{"key":"vertechi_dependent_optics","doi-asserted-by":"publisher","DOI":"10.4204\/EPTCS.380.8","article-title":"Dependent optics","volume-title":"Proceedings of Applied Category Theory 2022","author":"Vertechi","year":"2023"},{"key":"q_learning","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/bf00992698","article-title":"Q-learning","volume":"8","author":"Watkins","year":"1992","journal-title":"Machine Learning"}],"container-title":["Electronic Proceedings in Theoretical Computer Science"],"original-title":[],"language":"en","deposited":{"date-parts":[[2025,9,29]],"date-time":"2025-09-29T18:16:44Z","timestamp":1759169804000},"score":1,"resource":{"primary":{"URL":"http:\/\/arxiv.org\/abs\/2404.02688v2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,25]]},"references-count":31,"URL":"https:\/\/doi.org\/10.4204\/eptcs.429.15","relation":{},"ISSN":["2075-2180"],"issn-type":[{"type":"electronic","value":"2075-2180"}],"subject":[],"published":{"date-parts":[[2025,9,25]]}}}