{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,20]],"date-time":"2026-02-20T03:34:31Z","timestamp":1771558471438,"version":"3.50.1"},"reference-count":46,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61806217"],"award-info":[{"award-number":["61806217"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2025,3]]},"DOI":"10.1109\/tnnls.2024.3376538","type":"journal-article","created":{"date-parts":[[2024,3,20]],"date-time":"2024-03-20T18:23:43Z","timestamp":1710959023000},"page":"4834-4848","source":"Crossref","is-referenced-by-count":2,"title":["MEOL: A Maximum-Entropy Framework for Options Learning"],"prefix":"10.1109","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4730-7048","authenticated-orcid":false,"given":"Pin","family":"Zhang","sequence":"first","affiliation":[{"name":"College of Aeronautics Engineering, Air Force Engineering University, Xi&#x2019;an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenhan","family":"Dong","sequence":"additional","affiliation":[{"name":"College of Aeronautics Engineering, Air Force Engineering University, Xi&#x2019;an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6039-5214","authenticated-orcid":false,"given":"Ming","family":"Cai","sequence":"additional","affiliation":[{"name":"College of Aeronautics Engineering, Air Force Engineering University, Xi&#x2019;an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5334-3280","authenticated-orcid":false,"given":"Shengde","family":"Jia","sequence":"additional","affiliation":[{"name":"College of Mechatronic Engineering and Automation, National University of Defense Technology, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6269-789X","authenticated-orcid":false,"given":"Zi-Peng","family":"Wang","sequence":"additional","affiliation":[{"name":"Faculty of Information Technology, Beijing Laboratory of Smart Environmental Protection, Beijing Key Laboratory of Computational Intelligence and Intelligent System, and Beijing Institute of Artificial Intelligence, Beijing University of Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","first-page":"271","article-title":"Feudal reinforcement learning","volume-title":"Proc. 5th Int. Conf. Neural Inf. Process. Syst.","author":"Dayan"},{"key":"ref2","first-page":"118","article-title":"The maxq method for hierarchical reinforcement learning","volume-title":"Proc. 15th Int. Conf. Mach. Learn.","author":"Dietterich"},{"key":"ref3","first-page":"1043","article-title":"Reinforcement learning with hierarchies of machines","volume-title":"Proc. NIPS","author":"Parr"},{"issue":"1","key":"ref4","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","article-title":"Between MDPs and semi-MDPs: A framework for temporal abstraction in reinforcement learning","volume":"112","author":"Sutton","year":"1999","journal-title":"Artif. Intell."},{"key":"ref5","first-page":"1015","article-title":"Skill discovery in continuous reinforcement learning domains using skill chaining","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"22","author":"Konidaris"},{"key":"ref6","first-page":"531","article-title":"Td models: Modeling the world at a mixture of time scales","volume-title":"Proc. 12th Int. Conf. Int. Conf. Mach. Learn.","author":"Sutton"},{"key":"ref7","first-page":"361","article-title":"Automatic discovery of subgoals in reinforcement learning using diverse density","volume-title":"Proc. 18th Int. Conf. Mach. Learn.","author":"McGovern"},{"key":"ref8","first-page":"212","article-title":"Learning options in reinforcement learning","volume-title":"Proc. 5th Int. Symp. Abstraction, Reformulation Approximation","author":"Stolle"},{"key":"ref9","first-page":"1497","article-title":"Skill characterization based on betweenness","volume-title":"Proc. 21st Int. Conf. Neural Inf. Process. Syst.","author":"\u015eim\u015fek"},{"key":"ref10","article-title":"Temporal representation learning","author":"Bacon","year":"2018"},{"key":"ref11","first-page":"393","article-title":"Reinforcement learning methods for continuous-time Markov decision problems","volume-title":"Proc. 7th Int. Conf. Neural Inf. Process. Syst.","author":"Bradtke"},{"key":"ref12","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"Puterman","year":"1994"},{"key":"ref13","article-title":"Hierarchical control and learning for Markov decision processes","author":"Parr","year":"1998"},{"key":"ref14","first-page":"556","article-title":"Intra-option learning about temporally abstract actions","volume-title":"Proc. 15th Int. Conf. Mach. Learn.","author":"Sutton"},{"issue":"1","key":"ref15","first-page":"1726","article-title":"The option-critic architecture","volume-title":"Proc. AAAI Conf. Artif. Intell.","volume":"31","author":"Bacon"},{"key":"ref16","first-page":"II\u20131358","article-title":"Time-regularized interrupting options","volume-title":"Proc. 31st Int. Conf. Int. Conf. Mach. Learn.","author":"Mankowitz"},{"key":"ref17","first-page":"1","article-title":"When waiting is not an option: Learning options with a deliberation cost","volume-title":"Proc. 32nd AAAI Conf. Artif. Intell. 13th Innov. Appl. Artif. Intell. Conf. 8th AAAI Symp. Educ. Adv. Artif. Intell.","author":"Harb"},{"key":"ref18","volume-title":"DAC: The Double Actor-Critic Architecture for Learning Options","author":"Zhang","year":"2019"},{"key":"ref19","first-page":"4703","article-title":"An inference-based policy gradient method for learning options","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","volume":"80","author":"Smith"},{"key":"ref20","first-page":"1","article-title":"Learning options with interest functions","volume-title":"Proc. 32rd AAAI Conf. Artif. Intell. 31st Innov. Appl. Artif. Intell. Conf. 9th AAAI Symp. Educ. Adv. Artif. Intell.","author":"Khetarpal"},{"key":"ref21","first-page":"4632","article-title":"Flexible option learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Klissarov"},{"key":"ref22","first-page":"1255","article-title":"Modeling interaction via the principle of maximum causal entropy","volume-title":"Proc. 27th Int. Conf. Int. Conf. Mach. Learn.","author":"Ziebart"},{"key":"ref23","first-page":"1352","article-title":"Reinforcement learning with deep energy-based policies","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja"},{"key":"ref24","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja"},{"key":"ref25","article-title":"Reinforcement learning and control as probabilistic inference: Tutorial and review","author":"Levine","year":"2018","journal-title":"arXiv:1805.00909"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4471-6699-3"},{"key":"ref27","first-page":"1329","article-title":"Benchmarking deep reinforcement learning for continuous control","volume-title":"Proc. 33rd Int. Conf. Int. Conf. Mach. Learn.","author":"Duan"},{"key":"ref28","article-title":"Learnings options end-to-end for continuous action tasks","author":"Klissarov","year":"2017","journal-title":"arXiv:1712.00004"},{"key":"ref29","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv:1707.06347"},{"key":"ref30","article-title":"Diversity-enriched option-critic","author":"Kamat","year":"2020","journal-title":"arXiv:2011.02565"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2017.2654324"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3213566"},{"issue":"1","key":"ref33","doi-asserted-by":"crossref","first-page":"23","DOI":"10.1007\/s10462-021-10045-9","article-title":"Sparse online kernelized actor-critic learning in reproducing kernel Hilbert space","volume":"55","author":"Yang","year":"2022","journal-title":"Artif. Intell. Rev."},{"issue":"10","key":"ref34","doi-asserted-by":"crossref","first-page":"2699","DOI":"10.1016\/j.automatica.2012.06.096","article-title":"Computational adaptive optimal control for continuous-time linear systems with completely unknown dynamics","volume":"48","author":"Jiang","year":"2012","journal-title":"Automatica"},{"key":"ref35","doi-asserted-by":"crossref","first-page":"144","DOI":"10.1016\/j.automatica.2016.12.009","article-title":"H\u221e control of linear discrete-time systems: Off-policy reinforcement learning","volume":"78","author":"Kiumarsi","year":"2017","journal-title":"Automatica"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3098985"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2008.4739438"},{"key":"ref38","first-page":"9","article-title":"Planning by probabilistic inference","volume-title":"Proc. 9th Int. Workshop Artif. Intell. Stat.","volume":"R4","author":"Attias"},{"key":"ref39","first-page":"945","article-title":"Probabilistic inference for solving discrete and continuous state Markov decision processes","volume-title":"Proc. 23rd Int. Conf. Mach. Learn. (ICML)","author":"Toussaint"},{"issue":"1","key":"ref40","doi-asserted-by":"crossref","first-page":"164","DOI":"10.1214\/aoms\/1177697196","article-title":"A maximization technique occurring in the statistical analysis of probabilistic functions of Markov chains","volume":"41","author":"Baum","year":"1970","journal-title":"Ann. Math. Statist."},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3055499"},{"issue":"2","key":"ref42","doi-asserted-by":"crossref","first-page":"337","DOI":"10.1007\/s10994-016-5580-x","article-title":"Probabilistic inference for determining options in reinforcement learning","volume":"104","author":"Daniel","year":"2016","journal-title":"Mach. Learn."},{"key":"ref43","article-title":"Playing Atari with deep reinforcement learning","author":"Mnih","year":"2013","journal-title":"arXiv:1312.5602"},{"key":"ref44","doi-asserted-by":"crossref","DOI":"10.1109\/TNN.1998.712192","volume-title":"Introduction to Reinforcement Learning","author":"Sutton","year":"1998"},{"issue":"1","key":"ref45","first-page":"1","article-title":"Deep reinforcement learning with double q-learning","volume-title":"Proc. AAAI Conf. Artif. Intell.","volume":"30","author":"Van Hasselt"},{"key":"ref46","volume-title":"Garage: A Toolkit for Reproducible Reinforcement Learning Research","author":"Garage Contributors","year":"2019"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5962385\/10908444\/10476495.pdf?arnumber=10476495","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T05:50:53Z","timestamp":1740808253000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10476495\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3]]},"references-count":46,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2024.3376538","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"value":"2162-237X","type":"print"},{"value":"2162-2388","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,3]]}}}