{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T15:51:55Z","timestamp":1759333915710,"version":"build-2065373602"},"reference-count":26,"publisher":"Springer Science and Business Media LLC","issue":"29","license":[{"start":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T00:00:00Z","timestamp":1732838400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T00:00:00Z","timestamp":1732838400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1007\/s00521-024-10540-4","type":"journal-article","created":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T12:51:11Z","timestamp":1732884671000},"page":"23709-23721","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Integration of causal inference in the DQN sampling process for classical control problems"],"prefix":"10.1007","volume":"37","author":[{"given":"Jairo Ivan","family":"Velez Bedoya","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Manuel","family":"Gonzalez Bedia","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Luis Fernando","family":"Castillo Ossa","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jeferson","family":"Arango Lopez","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0816-1445","authenticated-orcid":false,"given":"Fernando","family":"Moreira","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,11,29]]},"reference":[{"issue":"6973","key":"10540_CR1","doi-asserted-by":"publisher","first-page":"170","DOI":"10.1136\/bmj.310.6973.170","volume":"310","author":"JM Bland","year":"1995","unstructured":"Bland JM, Altman DG (1995) Multiple significance tests: the Bonferroni method. Bmj 310(6973):170","journal-title":"Bmj"},{"issue":"11","key":"10540_CR2","first-page":"3207","volume":"14","author":"L Bottou","year":"2013","unstructured":"Bottou L, Peters J, Qui\u00f1onero-Candela J et al (2013) Counterfactual reasoning and learning systems: the example of computational advertising. J Mach Learn Res 14(11):3207","journal-title":"J Mach Learn Res"},{"key":"10540_CR3","unstructured":"Brockman G, Cheung V, Pettersson L etal .(2016) OpenAI Gym. arXiv preprint arXiv:1606.01540 1\u20134. arXiv:https:\/\/arxiv.org\/abs\/arXiv:1606.01540v1arXiv:arXiv:1606.01540v1"},{"issue":"1","key":"10540_CR4","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1111\/j.1467-6419.2007.00527.x","volume":"22","author":"M Caliendo","year":"2008","unstructured":"Caliendo M, Kopeinig S (2008) Some practical guidance for the implementation of propensity score matching. J Econ Surv 22(1):31\u201372","journal-title":"J Econ Surv"},{"issue":"3","key":"10540_CR5","doi-asserted-by":"publisher","first-page":"1659","DOI":"10.1109\/COMST.2021.3073036","volume":"23","author":"W Chen","year":"2021","unstructured":"Chen W, Qiu X, Cai T et al (2021) Deep reinforcement learning for internet of things: a comprehensive survey. IEEE Commun Surv Tutor 23(3):1659\u20131692","journal-title":"IEEE Commun Surv Tutor"},{"issue":"1995","key":"10540_CR6","first-page":"1","volume":"120","author":"J Fan","year":"2020","unstructured":"Fan J, Wang Z, Xie Y et al (2020) A theoretical analysis of deep Q-learning. Learn Dyn Control 120(1995):1\u20134","journal-title":"Learn Dyn Control"},{"key":"10540_CR7","unstructured":"Guts Y (2023) cartpole-q-learning. https:\/\/github.com\/YuriyGuts\/cartpole-q-learning"},{"key":"10540_CR8","first-page":"135","volume-title":"Deep Q-Networks","author":"Y Huang","year":"2020","unstructured":"Huang Y (2020) Deep Q-Networks. Springer Singapore, Singapore, pp 135\u2013160"},{"key":"10540_CR9","unstructured":"Jaderberg M, Mnih V, Czarnecki WM, et\u00a0al. (2016) Reinforcement learning with unsupervised auxiliary tasks. arXiv:1611.05397"},{"key":"10540_CR10","unstructured":"Laskin M, Yarats D, Liu H, etal. (2021) Urlb: Unsupervised reinforcement learning benchmark. arXiv:2110.15191"},{"key":"10540_CR11","doi-asserted-by":"crossref","unstructured":"Liu R, Zou J (2018) The effects of memory replay in reinforcement learning. In: 2018 56th annual allerton conference on communication, control, and computing (Allerton), IEEE, pp 478\u2013485","DOI":"10.1109\/ALLERTON.2018.8636075"},{"issue":"7540","key":"10540_CR12","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D et al (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533. https:\/\/doi.org\/10.1038\/nature14236","journal-title":"Nature"},{"key":"10540_CR13","unstructured":"Mnih, Volodymyr Kavukcuoglu K, Silver D, Graves A, et\u00a0al (2013) Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 1\u20139. arXiv:https:\/\/arxiv.org\/abs\/arXiv:1312.5602v1arXiv:arXiv:1312.5602v1"},{"key":"10540_CR14","doi-asserted-by":"publisher","first-page":"103","DOI":"10.1023\/A:1022635613229","volume":"13","author":"AW Moore","year":"1993","unstructured":"Moore AW, Atkeson CG (1993) Prioritized sweeping: reinforcement learning with less data and less time. Mach Learn 13:103\u2013130","journal-title":"Mach Learn"},{"key":"10540_CR15","unstructured":"Shao K, Tang Z, Zhu Y, etal. (2019) A survey of deep reinforcement learning in video games. arXiv preprint arXiv:1912.10944"},{"key":"10540_CR16","volume-title":"Reinforcement learning: An introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton RS, Barto AG (2018) Reinforcement learning: An introduction. MIT press, Cambridge"},{"key":"10540_CR17","doi-asserted-by":"crossref","unstructured":"Tai, Lei and Liu, Ming (2016) A robot exploration strategy based on q-learning network. In: 2016 IEEE international conference on real-time computing and robotics (rcar), IEEE, 57\u201362","DOI":"10.1109\/RCAR.2016.7784001"},{"key":"10540_CR18","doi-asserted-by":"crossref","unstructured":"Tessler C, Givony S, Zahavy T et\u00a0al (2017) A deep hierarchical approach to lifelong learning in minecraft. In: Proceedings of the AAAI conference on artificial intelligence","DOI":"10.1609\/aaai.v31i1.10744"},{"issue":"1","key":"10540_CR19","first-page":"103","volume":"21","author":"S Thrun","year":"2000","unstructured":"Thrun S, Littman ML (2000) Reinforcement learning: an introduction. AI Mag 21(1):103\u2013103","journal-title":"AI Mag"},{"key":"10540_CR20","doi-asserted-by":"crossref","unstructured":"Van Hasselt H, Guez A, Silver D (2016) Deep reinforcement learning with double q-learning. In: Proceedings of the AAAI conference on artificial intelligence","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"10540_CR21","unstructured":"Van Seijen H, Sutton RS (2013) Efficient planning in MDPs by small backups. In: Proc. 30th Int. Conf. Mach. Learn, 1\u20139"},{"issue":"10","key":"10540_CR22","doi-asserted-by":"publisher","first-page":"1484","DOI":"10.1016\/j.neunet.2009.05.011","volume":"22","author":"P Wawrzy\u0144ski","year":"2009","unstructured":"Wawrzy\u0144ski P (2009) Real-time reinforcement learning by sequential actor-critics and experience replay. Neural Netw 22(10):1484\u20131497","journal-title":"Neural Netw"},{"key":"10540_CR23","unstructured":"Wunder M, Littman M, Babes M (2010) Classes of multiagent Q-learning dynamics with epsilon-greedy exploration. In: Proceedings of the 27th international conference on machine learning (ICML-10), 1167\u20131174"},{"key":"10540_CR24","doi-asserted-by":"publisher","unstructured":"Yang CHH, Hung ITD, Ouyang Y, et\u00a0al (2022) Training a Resilient Q-network against Observational Interference. Proceedings of the 36th AAAI Conference on Artificial Intelligence, AAAI 2022 36:8814\u20138822. https:\/\/doi.org\/10.1609\/aaai.v36i8.20862, arXiv:https:\/\/arxiv.org\/abs\/2102.09677arXiv:2102.09677","DOI":"10.1609\/aaai.v36i8.20862"},{"key":"10540_CR25","unstructured":"Zhang S, Sutton RS (2017) A deeper look at experience replay. arXiv preprint arXiv:https:\/\/arxiv.org\/abs\/1712.01275arXiv:1712.01275"},{"key":"10540_CR26","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2021.102193","volume":"73","author":"SK Zhou","year":"2021","unstructured":"Zhou SK, Le HN, Luu K et al (2021) Deep reinforcement learning in medical imaging: a literature review. Medical Image Anal 73:102193","journal-title":"Medical Image Anal"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-024-10540-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-024-10540-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-024-10540-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T05:23:19Z","timestamp":1759209799000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-024-10540-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,29]]},"references-count":26,"journal-issue":{"issue":"29","published-print":{"date-parts":[[2025,10]]}},"alternative-id":["10540"],"URL":"https:\/\/doi.org\/10.1007\/s00521-024-10540-4","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"type":"print","value":"0941-0643"},{"type":"electronic","value":"1433-3058"}],"subject":[],"published":{"date-parts":[[2024,11,29]]},"assertion":[{"value":"12 October 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 October 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 November 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}