{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,9]],"date-time":"2025-07-09T23:01:22Z","timestamp":1752102082713,"version":"3.37.3"},"reference-count":74,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2022]]},"DOI":"10.1109\/tpami.2022.3200726","type":"journal-article","created":{"date-parts":[[2022,8,22]],"date-time":"2022-08-22T19:58:19Z","timestamp":1661198299000},"page":"1-12","source":"Crossref","is-referenced-by-count":3,"title":["Information Optimization and Transferable State Abstractions in Deep Reinforcement Learning"],"prefix":"10.1109","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4625-233X","authenticated-orcid":false,"given":"Diego","family":"Gomez","sequence":"first","affiliation":[{"name":"Departmento de Ingenier&#x00ED;a El&#x00E9;ctrica y Electr&#x00F3;nica, Colombia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8688-3195","authenticated-orcid":false,"given":"Nicanor","family":"Quijano","sequence":"additional","affiliation":[{"name":"Departmento de Ingenier&#x00ED;a El&#x00E9;ctrica y Electr&#x00F3;nica, Colombia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2492-4422","authenticated-orcid":false,"given":"Luis Felipe","family":"Giraldo","sequence":"additional","affiliation":[{"name":"Departamento de Ingenier&#x00ED;a Biom&#x00E9;dica, Universidad de Los Andes, Bogot&#x00E1;, Colombia"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"doi-asserted-by":"publisher","key":"ref1","DOI":"10.1016\/j.anbehav.2003.12.005"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1016\/j.conb.2016.09.004"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.1126\/science.1133687"},{"year":"2018","author":"Haarnoja","article-title":"Soft actor-critic algorithms and applications","key":"ref4"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.1038\/nature14236"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.1038\/nature24270"},{"doi-asserted-by":"publisher","key":"ref7","DOI":"10.1177\/0278364919887447"},{"doi-asserted-by":"publisher","key":"ref8","DOI":"10.1038\/nature14422"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.1073\/pnas.1800755115"},{"key":"ref10","first-page":"1282","article-title":"Quantifying generalization in reinforcement learning","volume-title":"Proc. 36th Int. Conf. Mach. Learn.","author":"Cobbe"},{"year":"2019","author":"Zhao","article-title":"Investigating generalisation in continuous deep reinforcement learning","key":"ref11"},{"key":"ref12","first-page":"1329","article-title":"Benchmarking deep reinforcement learning for continuous control","volume-title":"Proc. 33rd Int. Conf. Mach. Learn.","author":"Duan"},{"doi-asserted-by":"publisher","key":"ref13","DOI":"10.1038\/s42256-020-00257-z"},{"doi-asserted-by":"publisher","key":"ref14","DOI":"10.1038\/nature06860"},{"key":"ref15","doi-asserted-by":"crossref","DOI":"10.1101\/621540","article-title":"Hierarchical recurrent state space models reveal discrete and continuous dynamics of neural activity in C. Elegans","author":"Linderman","year":"2019"},{"doi-asserted-by":"publisher","key":"ref16","DOI":"10.1038\/s41467-021-26751-5"},{"year":"2018","author":"Zaslavsky","article-title":"Efficient human-like semantic representations via the information bottleneck principle","key":"ref17"},{"doi-asserted-by":"publisher","key":"ref18","DOI":"10.1146\/annurev.ento.46.1.471"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.1126\/science.298.5598.1569"},{"doi-asserted-by":"publisher","key":"ref20","DOI":"10.1016\/j.tics.2020.09.002"},{"year":"2020","author":"Davidson","article-title":"Investigating simple object representations in model-free deep reinforcement learning","key":"ref21"},{"doi-asserted-by":"publisher","key":"ref22","DOI":"10.1016\/S0004-3702(01)00110-2"},{"year":"2019","author":"Hessel","article-title":"On inductive biases in deep reinforcement learning","key":"ref23"},{"year":"2019","author":"Veerapaneni","article-title":"Entity abstraction in visual model-based reinforcement learning","key":"ref24"},{"key":"ref25","first-page":"724","article-title":"Unsupervised learning of object keypoints for perception and control","volume-title":"Proc. Adv. Int. Conf. Neural Inf. Process. Syst.","author":"Kulkarni"},{"key":"ref26","first-page":"14866","article-title":"Generating diverse high-fidelity images with VQ-VAE-2","author":"Razavi","year":"2019","journal-title":"Adv. Int. Conf. Neural Inf. Process. Syst."},{"doi-asserted-by":"publisher","key":"ref27","DOI":"10.1016\/S0004-3702(99)00052-1"},{"doi-asserted-by":"publisher","key":"ref28","DOI":"10.1609\/aaai.v31i1.10916"},{"volume-title":"Proc. Int. Conf. Learn. Representations","author":"Frans","article-title":"Meta learning shared hierarchies","key":"ref29"},{"volume-title":"Proc. Int. Conf. Learn. Representations","author":"Goyal","article-title":"Reinforcement learning with competitive ensembles of information-constrained primitives","key":"ref30"},{"year":"2020","author":"Hafner","article-title":"Mastering atari with discrete world models","key":"ref31"},{"doi-asserted-by":"publisher","key":"ref32","DOI":"10.1613\/jair.1.14174"},{"issue":"7","key":"ref33","first-page":"1633","article-title":"Transfer learning for reinforcement learning domains: A survey","volume":"10","author":"Taylor","year":"2009","journal-title":"J. Mach. Learn. Res."},{"doi-asserted-by":"publisher","key":"ref34","DOI":"10.1109\/tpami.2023.3292075"},{"doi-asserted-by":"publisher","key":"ref35","DOI":"10.1109\/TNNLS.2018.2890017"},{"key":"ref36","first-page":"5824","article-title":"Gradient surgery for multi-task learning","volume-title":"Proc. Adv. Int. Conf. Neural Inf. Process. Syst.","author":"Yu"},{"key":"ref37","first-page":"1126","article-title":"Model-agnostic meta-learning for fast adaptation of deep networks","volume-title":"Proc. 34th Int. Conf. Mach. Learn.","author":"Finn"},{"year":"2018","author":"Nichol","article-title":"On first-order meta-learning algorithms","key":"ref38"},{"year":"2019","author":"Kostrikov","article-title":"Imitation learning via off-policy distribution matching","key":"ref39"},{"key":"ref40","first-page":"10445","article-title":"Learning abstract options","author":"Riemer","year":"2018","journal-title":"Adv. Int. Conf. Neural Inf. Process. Syst."},{"volume-title":"Proc. Int. Conf. Learn. Representations","author":"Eysenbach","article-title":"Diversity is all you need: Learning skills without a reward function","key":"ref41"},{"key":"ref42","first-page":"361","article-title":"Reinforcement learning with soft state aggregation","volume-title":"Proc. Adv. Int. Conf. Neural Inf. Process. Syst.","author":"Singh"},{"key":"ref43","first-page":"531","article-title":"Towards a unified theory of state abstraction for MDPs","volume-title":"Proc. 9th Int. Symp. Artif. Intell. Math.","author":"Li"},{"key":"ref44","first-page":"2915","article-title":"Near optimal behavior via approximate state abstraction","volume-title":"Proc. 33th Int. Conf. Mach. Learn.","author":"Abel"},{"volume-title":"Proc. Int. Conf. Learn. Representations","author":"Zhang","article-title":"Learning invariant representations for reinforcement learning without reconstruction","key":"ref45"},{"doi-asserted-by":"publisher","key":"ref46","DOI":"10.1609\/aaai.v33i01.33013134"},{"volume-title":"Proc. Int. Conf. Mach. Learn. Workshop Struct. Knowl. Transfer Mach. Learn.","author":"Walsh","article-title":"Transferring state abstractions between MDPs","key":"ref47"},{"doi-asserted-by":"publisher","key":"ref48","DOI":"10.1111\/j.1745-6916.2008.00056.x"},{"issue":"3","key":"ref49","first-page":"265","article-title":"Eigenforms, interfaces and holographic encoding: Toward an evolutionary account of objects and spacetime","volume":"12","author":"Fields","year":"2017","journal-title":"Constructivist Found."},{"volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kingma","article-title":"Auto-encoding variational bayes","key":"ref50"},{"doi-asserted-by":"publisher","key":"ref51","DOI":"10.1109\/2.36"},{"doi-asserted-by":"publisher","key":"ref52","DOI":"10.1162\/neco.1992.4.6.863"},{"year":"2000","author":"Tishby","article-title":"The information bottleneck method","key":"ref53"},{"volume-title":"Proc. Int. Conf. Learn. Representations","author":"Alemi","article-title":"Deep variational information bottleneck","key":"ref54"},{"year":"2020","author":"Hafner","article-title":"Action and perception as divergence minimization","key":"ref55"},{"volume-title":"Proc. Int. Conf. Learn. Representations","author":"Goyal","article-title":"Infobot: Transfer and exploration via the information bottleneck","key":"ref56"},{"key":"ref57","first-page":"13 978","article-title":"Generalization in reinforcement learning with selective noise injection and information bottleneck","volume-title":"Proc. Adv. Int. Conf. Neural Inf. Process. Syst.","author":"Igl"},{"doi-asserted-by":"publisher","key":"ref58","DOI":"10.1007\/978-1-4419-1452-1_19"},{"doi-asserted-by":"publisher","key":"ref59","DOI":"10.1007\/978-3-642-24647-0_3"},{"year":"2020","author":"Asadi","article-title":"Learning state abstractions for transfer in continuous control","key":"ref60"},{"doi-asserted-by":"publisher","key":"ref61","DOI":"10.1109\/tnn.1998.712192"},{"volume-title":"Proc. 32nd Int. Conf. Mach. Learn.","author":"Schulman","article-title":"Trust region policy optimization","key":"ref62"},{"doi-asserted-by":"publisher","key":"ref63","DOI":"10.1109\/IROS.2012.6386109"},{"year":"2016","author":"Brockman","article-title":"OpenAI Gym","key":"ref64"},{"author":"Benjamins","article-title":"CARL: A benchmark for contextual and adaptive reinforcement learning","key":"ref65"},{"key":"ref66","first-page":"1995","article-title":"Dueling network architectures for deep reinforcement learning","volume-title":"Proc. 33rd Int. Conf. Mach. Learn.","author":"Wang"},{"volume-title":"Proc. Int. Conf. Learn. Representations","author":"Fortunato","article-title":"Noisy networks for exploration","key":"ref67"},{"volume-title":"Proc. Int. Conf. Learn. Representations","author":"Burda","article-title":"Exploration by random network distillation","key":"ref68"},{"doi-asserted-by":"publisher","key":"ref69","DOI":"10.1016\/j.cortex.2011.04.006"},{"key":"ref70","first-page":"4033","article-title":"Learning independent causal mechanisms","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","volume":"80","author":"Parascandolo"},{"year":"2017","author":"Heess","article-title":"Emergence of locomotion behaviours in rich environments","key":"ref71"},{"doi-asserted-by":"publisher","key":"ref72","DOI":"10.1038\/ncomms10838"},{"doi-asserted-by":"publisher","key":"ref73","DOI":"10.1111\/ejn.14839"},{"year":"2020","author":"Ajay","article-title":"OPAL: Offline primitive discovery for accelerating offline reinforcement learning","key":"ref74"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/4359286\/09864261.pdf?arnumber=9864261","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T12:18:10Z","timestamp":1706789890000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9864261\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"references-count":74,"URL":"https:\/\/doi.org\/10.1109\/tpami.2022.3200726","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"type":"print","value":"0162-8828"},{"type":"electronic","value":"2160-9292"},{"type":"electronic","value":"1939-3539"}],"subject":[],"published":{"date-parts":[[2022]]}}}