{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:09:47Z","timestamp":1750219787452,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":16,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,1,4]],"date-time":"2023-01-04T00:00:00Z","timestamp":1672790400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,1,4]]},"DOI":"10.1145\/3570991.3571031","type":"proceedings-article","created":{"date-parts":[[2023,1,5]],"date-time":"2023-01-05T04:13:03Z","timestamp":1672891983000},"page":"108-112","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Using Contrastive Samples for Identifying and Leveraging Possible Causal Relationships in Reinforcement Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3601-778X","authenticated-orcid":false,"given":"Harshad","family":"Khadilkar","sequence":"first","affiliation":[{"name":"Tata Consultancy Services Ltd, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9014-1098","authenticated-orcid":false,"given":"Hardik","family":"Meisheri","sequence":"additional","affiliation":[{"name":"Tata Consultancy Services Ltd, India"}]}],"member":"320","published-online":{"date-parts":[[2023,1,4]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Hindsight experience replay. Advances in neural information processing systems 30","author":"Andrychowicz Marcin","year":"2017","unstructured":"Marcin Andrychowicz, Filip Wolski, Alex Ray, Jonas Schneider, Rachel Fong, Peter Welinder, Bob McGrew, Josh Tobin, OpenAI Pieter\u00a0Abbeel, and Wojciech Zaremba. 2017. Hindsight experience replay. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_2_1","volume-title":"International Conference on Learning Representations.","author":"Badia Adri\u00e0\u00a0Puigdom\u00e8nech","year":"2020","unstructured":"Adri\u00e0\u00a0Puigdom\u00e8nech Badia, Pablo Sprechmann, Alex Vitvitskyi, Daniel Guo, Bilal Piot, Steven Kapturowski, Olivier Tieleman, Mart\u00edn Arjovsky, Alexander Pritzel, Andew Bolt, 2020. Never give up: Learning directed exploration strategies. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_3_1","unstructured":"Maxime Chevalier-Boisvert Lucas Willems and Suman Pal. 2018. Minimalistic Gridworld Environment for OpenAI Gym. https:\/\/github.com\/maximecb\/gym-minigrid"},{"key":"e_1_3_2_1_4_1","unstructured":"Ishita Dasgupta Jane Wang Silvia Chiappa Jovana Mitrovic Pedro Ortega David Raposo Edward Hughes Peter Battaglia Matthew Botvinick and Zeb Kurth-Nelson. 2019. Causal reasoning from meta-reinforcement learning. arXiv preprint arXiv:1901.08162(2019)."},{"key":"e_1_3_2_1_5_1","unstructured":"Maxime Gasse Damien Grasset Guillaume Gaudron and Pierre-Yves Oudeyer. 2021. Causal reinforcement learning using observational and interventional data. arXiv preprint arXiv:2106.14421(2021)."},{"key":"e_1_3_2_1_6_1","volume-title":"Reinforcement learning and causal models. The Oxford handbook of causal reasoning 295","author":"Gershman J","year":"2017","unstructured":"Samuel\u00a0J Gershman. 2017. Reinforcement learning and causal models. The Oxford handbook of causal reasoning 295 (2017)."},{"key":"e_1_3_2_1_7_1","unstructured":"St\u00a0John Grimbly Jonathan Shock and Arnu Pretorius. 2021. Causal Multi-Agent Reinforcement Learning: Review and Open Problems. arXiv preprint arXiv:2111.06721(2021)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA.2009.33"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i03.5631"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.23919\/CCC50068.2020.9189606"},{"key":"e_1_3_2_1_11_1","volume-title":"Human-level control through deep reinforcement learning. Nature 518, 7540","author":"Mnih Volodymyr","year":"2015","unstructured":"Volodymyr Mnih, Koray Kavukcuoglu, David Silver, Andrei\u00a0A Rusu, Joel Veness, Marc\u00a0G Bellemare, Alex Graves, Martin Riedmiller, Andreas\u00a0K Fidjeland, Georg Ostrovski, 2015. Human-level control through deep reinforcement learning. Nature 518, 7540 (2015), 529\u2013533."},{"key":"e_1_3_2_1_12_1","first-page":"15946","article-title":"Contrastive Reinforcement Learning of Symbolic Reasoning Domains","volume":"34","author":"Poesia Gabriel","year":"2021","unstructured":"Gabriel Poesia, WenXin Dong, and Noah Goodman. 2021. Contrastive Reinforcement Learning of Symbolic Reasoning Domains. Advances in Neural Information Processing Systems 34 (2021), 15946\u201315956.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_13_1","unstructured":"Tom Schaul John Quan Ioannis Antonoglou and David Silver. 2015. Prioritized experience replay. arXiv preprint arXiv:1511.05952(2015)."},{"key":"e_1_3_2_1_14_1","first-page":"22905","article-title":"Causal influence detection for improving efficiency in reinforcement learning","volume":"34","author":"Seitzer Maximilian","year":"2021","unstructured":"Maximilian Seitzer, Bernhard Sch\u00f6lkopf, and Georg Martius. 2021. Causal influence detection for improving efficiency in reinforcement learning. Advances in Neural Information Processing Systems 34 (2021), 22905\u201322918.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_15_1","unstructured":"Shiv Shankar Vihari Piratla Soumen Chakrabarti Siddhartha Chaudhuri Preethi Jyothi and Sunita Sarawagi. 2018. Generalizing across domains via cross-gradient training. arXiv preprint arXiv:1804.10745(2018)."},{"volume-title":"Temporal credit assignment in reinforcement learning","author":"Sutton Richard\u00a0Stuart","key":"e_1_3_2_1_16_1","unstructured":"Richard\u00a0Stuart Sutton. 1984. Temporal credit assignment in reinforcement learning. University of Massachusetts Amherst."}],"event":{"name":"CODS-COMAD 2023: 6th Joint International Conference on Data Science & Management of Data (10th ACM IKDD CODS and 28th COMAD)","acronym":"CODS-COMAD 2023","location":"Mumbai India"},"container-title":["Proceedings of the 6th Joint International Conference on Data Science &amp; Management of Data (10th ACM IKDD CODS and 28th COMAD)"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3570991.3571031","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3570991.3571031","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:37:53Z","timestamp":1750178273000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3570991.3571031"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,1,4]]},"references-count":16,"alternative-id":["10.1145\/3570991.3571031","10.1145\/3570991"],"URL":"https:\/\/doi.org\/10.1145\/3570991.3571031","relation":{},"subject":[],"published":{"date-parts":[[2023,1,4]]},"assertion":[{"value":"2023-01-04","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}