{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T11:00:25Z","timestamp":1761562825930,"version":"3.40.3"},"publisher-location":"Cham","reference-count":37,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030898168"},{"type":"electronic","value":"9783030898175"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-89817-5_16","type":"book-chapter","created":{"date-parts":[[2021,10,21]],"date-time":"2021-10-21T23:13:18Z","timestamp":1634857998000},"page":"213-227","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Causal Based Action Selection Policy for Reinforcement Learning"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2117-5654","authenticated-orcid":false,"given":"Ivan","family":"Feliciano-Avelino","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2441-5265","authenticated-orcid":false,"given":"Arqu\u00edmides","family":"M\u00e9ndez-Molina","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7618-8762","authenticated-orcid":false,"given":"Eduardo F.","family":"Morales","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3685-5567","authenticated-orcid":false,"given":"L. Enrique","family":"Sucar","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,10,21]]},"reference":[{"doi-asserted-by":"crossref","unstructured":"Abel, D., et al.: Goal-based action priors. In: Proceedings of the International Conference on Automated Planning and Scheduling, vol. 25 (2015)","key":"16_CR1","DOI":"10.1609\/icaps.v25i1.13697"},{"unstructured":"Akkaya, I., et al.: Solving rubik\u2019s cube with a robot hand. arXiv preprint arXiv:1910.07113 (2019)","key":"16_CR2"},{"doi-asserted-by":"crossref","unstructured":"Arulkumaran, K., Deisenroth, M.P., Brundage, M., Bharath, A.A.: A brief survey of deep reinforcement learning. arXiv preprint arXiv:1708.05866 (2017)","key":"16_CR3","DOI":"10.1109\/MSP.2017.2743240"},{"unstructured":"Bareinboim, E., Forney, A., Pearl, J.: Bandits with unobserved confounders: A causal approach. In: Conference on Neural Information Processing Systems, pp. 1342\u20131350 (2015)","key":"16_CR4"},{"unstructured":"Beyret, B., Hern\u00e1ndez-Orallo, J., Cheke, L., Halina, M., Shanahan, M., Crosby, M.: The animal-AI environment: training and testing animal-like artificial cognition (2019)","key":"16_CR5"},{"unstructured":"Brockman, G., et al.: OpenAI gym (2016)","key":"16_CR6"},{"unstructured":"Campbell, D.T., Cook, T.D.: Quasi-experimentation: Design & Analysis Issues for Field Settings. Rand McNally College Publishing Company, Chicago (1979)","key":"16_CR7"},{"unstructured":"Chalupka, K., Perona, P., Eberhardt, F.: Visual causal feature learning. In: Proceedings of the Thirty-First Conference on Uncertainty in Artificial Intelligence (UAI 2015), pp. 181\u2013190. AUAI Press, Arlington, Virginia (2015)","key":"16_CR8"},{"unstructured":"Dasgupta, I., et al.: Causal reasoning from meta-reinforcement learning. CoRR abs\/1901.08162 (2019). http:\/\/arxiv.org\/abs\/1901.08162","key":"16_CR9"},{"unstructured":"Everitt, T., Hutter, M.: Reward tampering problems and solutions in reinforcement learning: a causal influence diagram perspective. CoRR abs\/1908.04734 (2019). http:\/\/arxiv.org\/abs\/1908.04734","key":"16_CR10"},{"unstructured":"Geibel, P.: Reinforcement learning with bounded risk. In: Proceedings of the Eighteenth International Conference on Machine Learning, pp. 162\u2013169. Morgan Kaufmann (2001)","key":"16_CR11"},{"doi-asserted-by":"crossref","unstructured":"Gershman, S.J.: Reinforcement Learning and Causal Models. The Oxford handbook of causal reasoning, p. 295 (2017)","key":"16_CR12","DOI":"10.1093\/oxfordhb\/9780199399550.013.20"},{"unstructured":"Gonzalez-Soto, M., Sucar, L.E., Escalante, H.J.: Playing against nature: causal discovery for decision making under uncertainty. CoRR abs\/1807.01268 (2018). http:\/\/arxiv.org\/abs\/1807.01268","key":"16_CR13"},{"unstructured":"Goodfellow, I., Bengio, Y., Courville, A.: Deep Learning. MIT Press, Cambridge(2016)","key":"16_CR14"},{"unstructured":"Gottesman, O., et al.: Evaluating reinforcement learning algorithms in observational health settings (2018)","key":"16_CR15"},{"unstructured":"Hafner, D., Lillicrap, T.P., Ba, J., Norouzi, M.: Dream to control: learning behaviors by latent imagination. In: 8th International Conference on Learning Representations (ICLR). OpenReview.net (2020). https:\/\/openreview.net\/forum?id=S1lOTC4tDS","key":"16_CR16"},{"key":"16_CR17","volume-title":"The Stanford Encyclopedia of Philosophy","author":"C Hitchcock","year":"2019","unstructured":"Hitchcock, C.: Causal models. In: Zalta, E.N. (ed.) The Stanford Encyclopedia of Philosophy. Stanford University, Metaphysics Research Lab (2019)"},{"unstructured":"Ho, S.: Causal learning versus reinforcement learning for knowledge learning and problem solving. In: Workshops of the The Thirty-First AAAI Conference on Artificial Intelligence. AAAI Workshops, vol. WS-17. AAAI Press (2017)","key":"16_CR18"},{"unstructured":"Lattimore, F., Lattimore, T., Reid, M.D.: Causal bandits: Learning good interventions via causal inference. In: Advances in Neural Information Processing Systems, pp. 1181\u20131189 (2016)","key":"16_CR19"},{"unstructured":"Lu, C., Sch\u00f6lkopf, B., Hern\u00e1ndez-Lobato, J.M.: Deconfounding reinforcement learning in observational settings. CoRR abs\/1812.10576 (2018), http:\/\/arxiv.org\/abs\/1812.10576","key":"16_CR20"},{"doi-asserted-by":"crossref","unstructured":"Madumal, P., Miller, T., Sonenberg, L., Vetere, F.: Explainable reinforcement learning through a causal lens. arXiv preprint arXiv:1905.10958 (2019)","key":"16_CR21","DOI":"10.1609\/aaai.v34i03.5631"},{"unstructured":"Mazumder, S., et al.: Guided exploration in deep reinforcement learning (2019). https:\/\/openreview.net\/forum?id=SJMeTo09YQ","key":"16_CR22"},{"unstructured":"McFarlane, R.: A Survey of Exploration Strategies in Reinforcement Learning. McGill University (2018). http:\/\/www.cs.mcgill.ca\/cs526\/roger.pdf","key":"16_CR23"},{"unstructured":"Mnih, V., et al.: Playing Atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 (2013)","key":"16_CR24"},{"doi-asserted-by":"crossref","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","key":"16_CR25","DOI":"10.1038\/nature14236"},{"doi-asserted-by":"crossref","unstructured":"Nair, A., McGrew, B., Andrychowicz, M., Zaremba, W., Abbeel, P.: Overcoming exploration in reinforcement learning with demonstrations (2017)","key":"16_CR26","DOI":"10.1109\/ICRA.2018.8463162"},{"unstructured":"Nair, S., Zhu, Y., Savarese, S., Fei-Fei, L.: Causal induction from visual observations for goal directed tasks. arXiv preprint arXiv:1910.01751 (2019)","key":"16_CR27"},{"doi-asserted-by":"crossref","unstructured":"Pearl, J.: Causality: Models, Reasoning, and Interference. Cambridge University Press, Cambridge (2009)","key":"16_CR28","DOI":"10.1017\/CBO9780511803161"},{"unstructured":"Rezende, D.J., et al.: Causally correct partial models for reinforcement learning. CoRR abs\/2002.02836 (2020). https:\/\/arxiv.org\/abs\/2002.02836","key":"16_CR29"},{"unstructured":"Saunders, W., Sastry, G., Stuhlm\u00fcller, A., Evans, O.: Trial without error: towards safe reinforcement learning via human intervention. In: Andr\u00e9, E., Koenig, S., Dastani, M., Sukthankar, G. (eds.) Proceedings of the 17th International Conference on Autonomous Agents and MultiAgent Systems (AAMAS), pp. 2067\u20132069. ACM (2018)","key":"16_CR30"},{"unstructured":"Sen, R., Shanmugam, K., Dimakis, A.G., Shakkottai, S.: Identifying best interventions through online importance sampling. In: Proceedings of the 34th International Conference on Machine Learning, vol. 70, pp. 3057\u20133066. JMLR. org (2017)","key":"16_CR31"},{"unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. The MIT Press, Cambridge (2018)","key":"16_CR32"},{"doi-asserted-by":"crossref","unstructured":"Vinyals, O., et al.: Grandmaster level in Starcraft III using multi-agent reinforcement learning. Nature 575, 350\u2013354 (2019)","key":"16_CR33","DOI":"10.1038\/s41586-019-1724-z"},{"issue":"3\u20134","key":"16_CR34","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/BF00992698","volume":"8","author":"CJ Watkins","year":"1992","unstructured":"Watkins, C.J., Dayan, P.: Q-learning. Mach. Learn. 8(3\u20134), 279\u2013292 (1992)","journal-title":"Mach. Learn."},{"doi-asserted-by":"crossref","unstructured":"Woodward, J.: Making Things Happen: A Theory of Causal Explanation. Oxford University Press, Oxford (2005)","key":"16_CR35","DOI":"10.1093\/0195155270.001.0001"},{"unstructured":"Woodward, J.: Causation and manipulability. In: Zalta, E.N. (ed.) The Stanford Encyclopedia of Philosophy. Metaphysics Research Lab, Stanford University, winter 2016 edn. (2016)","key":"16_CR36"},{"doi-asserted-by":"crossref","unstructured":"Zhang, J., Bareinboim, E.: Transfer learning in multi-armed bandit: a causal approach. In: Proceedings of the 16th Conference on Autonomous Agents and MultiAgent Systems, pp. 1778\u20131780 (2017)","key":"16_CR37","DOI":"10.24963\/ijcai.2017\/186"}],"container-title":["Lecture Notes in Computer Science","Advances in Computational Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-89817-5_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T20:03:25Z","timestamp":1710360205000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-89817-5_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030898168","9783030898175"],"references-count":37,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-89817-5_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"21 October 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MICAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Mexican International Conference on Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 October 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 October 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"micai2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.micai.org\/2021\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"129","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"58","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"45% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}