{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,15]],"date-time":"2026-04-15T18:26:38Z","timestamp":1776277598410,"version":"3.50.1"},"reference-count":51,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,12]]},"DOI":"10.1109\/cdc40024.2019.9029255","type":"proceedings-article","created":{"date-parts":[[2020,3,13]],"date-time":"2020-03-13T04:43:11Z","timestamp":1584074591000},"page":"3677-3684","source":"Crossref","is-referenced-by-count":5,"title":["An Information-theoretic On-line Learning Principle for Specialization in Hierarchical Decision-Making Systems"],"prefix":"10.1109","author":[{"given":"Heinke","family":"Hihn","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sebastian","family":"Gottwald","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Daniel A.","family":"Braun","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref39","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"International Conference on Machine Learning"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.3389\/fnins.2018.00932"},{"key":"ref33","article-title":"Information-theoretic bounded rationality","author":"ortega","year":"2015"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1098\/rspa.2012.0683"},{"key":"ref31","article-title":"Information-theoretic motor skill learning","author":"neumann","year":"2013","journal-title":"Proceedings of AAAI Workshop on Intelligent Robotic Systems"},{"key":"ref30","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1016\/S0005-1098(00)00058-3"},{"key":"ref36","article-title":"Combining reinforcement learning with a local control algorithm","author":"randl\u00f8v","year":"2000","journal-title":"International Conference on Machine Learning"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8202240"},{"key":"ref34","first-page":"2825","article-title":"Scikit-learn: Machine learning in Python","volume":"12","author":"pedregosa","year":"2011","journal-title":"Journal of Machine Learning Research"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0063400"},{"key":"ref27","article-title":"Reinforcement learning and control as probabilistic inference: Tutorial and review","author":"levine","year":"2018"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1006\/game.1995.1023"},{"key":"ref2","article-title":"Hierarchical, heterogeneous control of non-linear dynamical systems using reinforcement learning","author":"abramova","year":"2012","journal-title":"European Workshop On Reinforcement Learning at ICML"},{"key":"ref1","first-page":"265","article-title":"Tensorflow: a system for large-scale machine learning","volume":"16","author":"abadi","year":"2016","journal-title":"OSDI"},{"key":"ref20","article-title":"Soft q-learning with mutual-information regularization","author":"grau-moya","year":"2019","journal-title":"International Conference on Learning Representations"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1991.3.1.79"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-99978-4_17"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1002\/0471660264"},{"key":"ref23","first-page":"1008","article-title":"Actor-critic algorithms","author":"konda","year":"2000","journal-title":"Advances in neural information processing systems"},{"key":"ref26","article-title":"An information-theoretic optimality principle for deep reinforcement learning","author":"leibfried","year":"2017","journal-title":"NeurIPS 2018 Workshop on Deep Reinforcement Learning"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1162\/NECO_a_00758"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1007\/s10015-004-0340-6"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2012.2200299"},{"key":"ref10","article-title":"Openai gym","author":"brockman","year":"2016"},{"key":"ref11","first-page":"273","article-title":"Hierarchical relative entropy policy search","author":"daniel","year":"2012","journal-title":"Artificial Intelligence and Statistics"},{"key":"ref40","article-title":"High-dimensional continuous control using generalized advantage estimation","author":"schulman","year":"2015","journal-title":"International Conference on Learning Representations"},{"key":"ref12","doi-asserted-by":"crossref","first-page":"599","DOI":"10.1111\/cogs.12101","article-title":"One and done&#x0192; optimal decisions from very few samples","volume":"38","author":"edward","year":"2014","journal-title":"Cognitive Science"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.3389\/frobt.2015.00027"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1126\/science.aac6076"},{"key":"ref15","article-title":"Divide-and-conquer reinforcement learning","author":"ghosh","year":"2017"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.3390\/e21040375"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1162\/neco_a_01153"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.3390\/e20010001"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46227-1_30"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.23919\/ACC.2017.7963639"},{"key":"ref3","first-page":"22","article-title":"Constrained policy optimization","author":"achiam","year":"2017","journal-title":"Proceedings of the 34th International Conference on Machine Learning-Volume 70"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/978-94-017-2053-3_3"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1972.1054753"},{"key":"ref8","first-page":"351","article-title":"Convergence of alternating optimization","volume":"11","author":"bezdek","year":"2003","journal-title":"Neural Parallel & Scientific Computations"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-89629-8_4"},{"key":"ref49","first-page":"262","author":"wolpert","year":"2006","journal-title":"Information Theory &#x2013; The Bridge Connecting Bounded Rational Game Theory and Statistical Physics"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1972.1054855"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1515\/9781400829460"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4419-1452-1_19"},{"key":"ref48","doi-asserted-by":"crossref","first-page":"229","DOI":"10.1007\/BF00992696","article-title":"Simple statistical gradient-following algorithms for connectionist reinforcement learning","volume":"8","author":"williams","year":"1992","journal-title":"Machine Learning"},{"key":"ref47","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1007\/BF00992698","article-title":"Q-learning","volume":"8","author":"watkins","year":"1992","journal-title":"Machine Learning"},{"key":"ref42","first-page":"1038","article-title":"Generalization in reinforcement learning: Successful examples using sparse coarse coding","author":"sutton","year":"1996","journal-title":"Advances in neural information processing systems"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.2307\/1884852"},{"key":"ref44","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","author":"sutton","year":"2000","journal-title":"Advances in neural information processing systems"},{"key":"ref43","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"}],"event":{"name":"2019 IEEE 58th Conference on Decision and Control (CDC)","location":"Nice, France","start":{"date-parts":[[2019,12,11]]},"end":{"date-parts":[[2019,12,13]]}},"container-title":["2019 IEEE 58th Conference on Decision and Control (CDC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8977134\/9028853\/09029255.pdf?arnumber=9029255","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,19]],"date-time":"2022-07-19T20:19:42Z","timestamp":1658261982000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9029255\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,12]]},"references-count":51,"URL":"https:\/\/doi.org\/10.1109\/cdc40024.2019.9029255","relation":{},"subject":[],"published":{"date-parts":[[2019,12]]}}}