{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T07:02:37Z","timestamp":1773903757554,"version":"3.50.1"},"reference-count":48,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2023]]},"DOI":"10.1109\/access.2023.3331728","type":"journal-article","created":{"date-parts":[[2023,11,9]],"date-time":"2023-11-09T19:02:42Z","timestamp":1699556562000},"page":"126462-126481","source":"Crossref","is-referenced-by-count":6,"title":["CARL: A Synergistic Framework for Causal Reinforcement Learning"],"prefix":"10.1109","volume":"11","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2441-5265","authenticated-orcid":false,"given":"Arqu\u00edmides","family":"M\u00e9ndez-Molina","sequence":"first","affiliation":[{"name":"Instituto Nacional de Astrof&#x00ED;sica, &#x00D3;ptica y Electr&#x00F3;nica, San Andr&#x00E9;s Cholula, Puebla, Mexico"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7618-8762","authenticated-orcid":false,"given":"Eduardo F.","family":"Morales","sequence":"additional","affiliation":[{"name":"Instituto Nacional de Astrof&#x00ED;sica, &#x00D3;ptica y Electr&#x00F3;nica, San Andr&#x00E9;s Cholula, Puebla, Mexico"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3685-5567","authenticated-orcid":false,"given":"L. Enrique","family":"Sucar","sequence":"additional","affiliation":[{"name":"Instituto Nacional de Astrof&#x00ED;sica, &#x00D3;ptica y Electr&#x00F3;nica, San Andr&#x00E9;s Cholula, Puebla, Mexico"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref2","article-title":"Solving Rubik\u2019s cube with a robot hand","author":"Akkaya","year":"2019","journal-title":"arXiv:1910.07113"},{"key":"ref3","article-title":"Evaluating reinforcement learning algorithms in observational health settings","author":"Gottesman","year":"2018","journal-title":"arXiv:1805.12298"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511803161"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.2139\/ssrn.3792824"},{"key":"ref6","article-title":"Sample-efficient reinforcement learning via counterfactual-based data augmentation","author":"Lu","year":"2020","journal-title":"arXiv:2012.09092"},{"key":"ref7","first-page":"2493","article-title":"Explainable reinforcement learning through a causal lens","volume-title":"Proc. 34th AAAI Conf. Artif. Intell., 32nd Innov. Appl. Artif. Intell. Conf., 10th AAAI Symp. Educ. Adv. Artif. Intell. (EAAI)","author":"Madumal"},{"key":"ref8","first-page":"9260","article-title":"Invariant action effect model for reinforcement learning","volume-title":"Proc. 36th AAAI Conf. Artif. Intell., 34th Conf. Innov. Appl. Artif. Intell., 12th Symp. Educ. Adv. Artif. Intell. (EAAI)","author":"Zhu"},{"issue":"3","key":"ref9","first-page":"95","article-title":"Causal based Q-learning","volume":"149","author":"M\u00e9ndez-Molina","year":"2020","journal-title":"Res. Comput. Sci."},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-89817-5_16"},{"key":"ref11","first-page":"421","article-title":"Causal discovery and reinforcement learning: A synergistic integration","volume-title":"Proc. Int. Conf. Probabilistic Graph. Models (PGM)","volume":"186","author":"M\u00e9ndez-Molina"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/tnn.1998.712192"},{"key":"ref13","article-title":"Causality-driven hierarchical structure discovery for reinforcement learning","author":"Peng","year":"2022","journal-title":"arXiv:2210.06964"},{"key":"ref14","article-title":"A survey on causal reinforcement learning","author":"Zeng","year":"2023","journal-title":"arXiv:2302.05209"},{"key":"ref15","first-page":"1342","article-title":"Bandits with unobserved confounders: A causal approach","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Bareinboim"},{"key":"ref16","first-page":"1156","article-title":"Counterfactual data-fusion for online reinforcement learners","volume-title":"Proc. 34th Int. Conf. Mach. Learn. (ICML)","volume":"70","author":"Forney"},{"key":"ref17","first-page":"3057","article-title":"Identifying best interventions through online importance sampling","volume-title":"Proc. 34th Int. Conf. Mach. Learn. (ICML)","volume":"70","author":"Sen"},{"key":"ref18","first-page":"5508","article-title":"Causal bandits with propagating inference","volume-title":"Proc. 35th Int. Conf. Mach. Learn. (ICML)","volume":"80","author":"Yabe"},{"key":"ref19","first-page":"1181","article-title":"Causal bandits: Learning good interventions via causal inference","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Lattimore"},{"key":"ref20","first-page":"141","article-title":"Regret analysis of bandit problems with causal background knowledge","volume-title":"Proc. 36th Conf. Uncertainty Artif. Intell. (UAI)","volume":"124","author":"Lu"},{"key":"ref21","first-page":"22293","article-title":"Confounding-robust policy evaluation in infinitehorizon reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Kallus"},{"key":"ref22","article-title":"Markov decision processes with unobserved confounders: A causal approach","author":"Zhang","year":"2016"},{"key":"ref23","first-page":"1999","article-title":"Off-policy evaluation in infinite-horizon reinforcement learning with latent confounders","volume-title":"Proc. 24th Int. Conf. Artif. Intell. Statist. (AISTATS)","volume":"130","author":"Bennett"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/122344.122377"},{"key":"ref25","first-page":"465","article-title":"PILCO: A model-based and dataefficient approach to policy search","volume-title":"Proc. 28th Int. Conf. Mach. Learn. (ICML)","author":"Deisenroth"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"key":"ref27","article-title":"Causal Markov decision processes: Learning good interventions efficiently","author":"Lu","year":"2021","journal-title":"arXiv:2102.07663"},{"key":"ref28","first-page":"24817","article-title":"Causal bandits with unknown graph structure","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","author":"Lu"},{"key":"ref29","article-title":"Causal induction from visual observations for goal directed tasks","author":"Nair","year":"2019","journal-title":"arXiv:1910.01751"},{"key":"ref30","first-page":"1809","article-title":"Schema networks: Zero-shot transfer with a generative causal model of intuitive physics","volume-title":"Proc. 34th Int. Conf. Mach. Learn.","author":"Kansky"},{"key":"ref31","first-page":"9848","article-title":"Causal curiosity: RL agents discovering self-supervised experiments for causal representation learning","volume-title":"Proc. 38th Int. Conf. Mach. Learn. (ICML)","volume":"139","author":"Sontakke"},{"key":"ref32","volume-title":"Reichenbach\u2019s Common Cause Principle","author":"Arntzenius","year":"2010"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1023\/A:1007694015589"},{"key":"ref34","first-page":"15","article-title":"Scaling up reinforcement learning with a relational representation","volume-title":"Proc. Workshop Adaptability Multi-Agent Syst.","author":"Morales"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref36","article-title":"Playing Atari with deep reinforcement learning","volume-title":"arXiv:1312.5602","author":"Mnih"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i8.26109"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4613-0303-9_33"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.18637\/jss.v035.i03"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1016\/b978-1-55860-141-3.50030-4"},{"key":"ref41","first-page":"1104","article-title":"Exploiting structure in policy construction","volume-title":"Proc. IJCAI","volume":"14","author":"Boutilier"},{"issue":"1","key":"ref42","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1613\/jair.639","article-title":"Hierarchical reinforcement learning with the MAXQ value function decomposition","volume":"13","author":"Dietterich","year":"2000","journal-title":"J. Artif. Intell. Res."},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref44","first-page":"2094","article-title":"Deep reinforcement learning with double Q-learning","volume-title":"Proc. 30th AAAI Conf. Artif. Intell.","author":"van Hasselt"},{"key":"ref45","first-page":"3215","article-title":"Rainbow: Combining improvements in deep reinforcement learning","volume-title":"Proc. 32nd AAAI Conf. Artif. Intell., 30th Innov. Appl. Artif. Intell. (IAAI), 8th AAAI Symp. Educ. Adv. Artif. Intell. (EAAI)","author":"Hessel"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.3390\/drones6110365"},{"issue":"274","key":"ref47","first-page":"1","article-title":"CleanRL: High-quality single-file implementations of deep reinforcement learning algorithms","volume":"23","author":"Huang","year":"2022","journal-title":"J. Mach. Learn. Res."},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1017\/cbo9781139381772.004"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/10005208\/10314482.pdf?arnumber=10314482","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,2]],"date-time":"2024-03-02T17:10:59Z","timestamp":1709399459000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10314482\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"references-count":48,"URL":"https:\/\/doi.org\/10.1109\/access.2023.3331728","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]}}}