{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,9]],"date-time":"2025-10-09T00:18:44Z","timestamp":1759969124827,"version":"build-2065373602"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,5,8]],"date-time":"2025-05-08T00:00:00Z","timestamp":1746662400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,5,8]]},"DOI":"10.1145\/3701716.3715224","type":"proceedings-article","created":{"date-parts":[[2025,5,23]],"date-time":"2025-05-23T16:20:01Z","timestamp":1748017201000},"page":"85-94","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["FAST-Q: Fast-track Exploration with Adversarially Balanced State Representations for Counterfactual Action Estimation in Offline Reinforcement Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-5786-8891","authenticated-orcid":false,"given":"Pulkit","family":"Agrawal","sequence":"first","affiliation":[{"name":"Games24x7, Bengaluru, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1551-9679","authenticated-orcid":false,"given":"Rukma","family":"Talwadker","sequence":"additional","affiliation":[{"name":"Games24x7, Bengaluru, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-9079-5940","authenticated-orcid":false,"given":"Aditya","family":"Pareek","sequence":"additional","affiliation":[{"name":"Games24x7, Bengaluru, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2385-1290","authenticated-orcid":false,"given":"Tridib","family":"Mukherjee","sequence":"additional","affiliation":[{"name":"Games24x7, Bengaluru, India"}]}],"member":"320","published-online":{"date-parts":[[2025,5,23]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3632410.3632455"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/304182.304187"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390162"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"crossref","unstructured":"Shai Ben-David John Blitzer Koby Crammer and Fernando Pereira. 2006. Analysis of Representations for Domain Adaptation. In NIPS.","DOI":"10.7551\/mitpress\/7503.003.0022"},{"key":"e_1_3_2_2_5_1","unstructured":"Ioana Bica Ahmed M. Alaa James Jordon and Mihaela van der Schaar. 2020. Estimating Counterfactual Treatment Outcomes over Time Through Adversarially Balanced Representations. arXiv:2002.04083 [cs.LG] https:\/\/arxiv.org\/abs\/2002. 04083"},{"key":"e_1_3_2_2_6_1","unstructured":"Greg Brockman Vicki Cheung Ludwig Pettersson Jonas Schneider John Schulman Jie Tang and Wojciech Zaremba. 2016. OpenAI Gym. arXiv:arXiv:1606.01540"},{"key":"e_1_3_2_2_7_1","volume-title":"Huang","author":"Chang Shiyu","year":"2017","unstructured":"Shiyu Chang, Yang Zhang, Jiliang Tang, Dawei Yin, Yi Chang, Mark A. Hasegawa- Johnson, and Thomas S. Huang. 2017. Streaming Recommender Systems. In WWW."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403316"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-47436-2_33"},{"key":"e_1_3_2_2_10_1","unstructured":"Justin Fu Aviral Kumar Ofir Nachum George Tucker and Sergey Levine. 2021. D4RL: Datasets for Deep Data-Driven Reinforcement Learning. arXiv:2004.07219 [cs.LG] https:\/\/arxiv.org\/abs\/2004.07219"},{"key":"e_1_3_2_2_11_1","unstructured":"Scott Fujimoto. 2021. TD3BC Repository. https:\/\/github.com\/sfujim\/TD3_BC"},{"key":"e_1_3_2_2_12_1","unstructured":"Scott Fujimoto. 2023. Diffusion-QL git Repo. https:\/\/github.com\/Zhendong- Wang\/Diffusion-Policies-for-Offline-RL"},{"key":"e_1_3_2_2_13_1","volume-title":"Proceedings of the 35th International Conference on Neural Information Processing Systems (NIPS '21)","author":"Fujimoto Scott","year":"2024","unstructured":"Scott Fujimoto and Shixiang Shane Gu. 2024. A minimalist approach to offline reinforcement learning. In Proceedings of the 35th International Conference on Neural Information Processing Systems (NIPS '21). Article 1540, 14 pages."},{"key":"e_1_3_2_2_14_1","volume-title":"International conference on machine learning, ICML. 1587--1596","author":"Fujimoto Scott","year":"2018","unstructured":"Scott Fujimoto, Herke Hoof, and David Meger. 2018. Addressing function approximation error in actor-critic methods. In International conference on machine learning, ICML. 1587--1596."},{"key":"e_1_3_2_2_15_1","unstructured":"Scott Fujimoto David Meger and Doina Precup. 2019. Off-Policy Deep Reinforcement Learning without Exploration. arXiv:1812.02900 [cs.LG] https: \/\/arxiv.org\/abs\/1812.02900"},{"key":"e_1_3_2_2_16_1","unstructured":"Yarin Gal and Zoubin Ghahramani. 2016. A Theoretically Grounded Application of Dropout in Recurrent Neural Networks."},{"key":"e_1_3_2_2_17_1","unstructured":"Games24x7. 2024. FAST-Q GIT Repo. https:\/\/github.com\/scarce-user-53\/Fast-Q"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"crossref","unstructured":"Yaroslav Ganin Evgeniya Ustinova Hana Ajakan Pascal Germain Hugo Larochelle Fran\u00e7ois Laviolette Mario Marchand and Victor Lempitsky. 2016. Domain-Adversarial Training of Neural Networks. arXiv:1505.07818 [stat.ML] https:\/\/arxiv.org\/abs\/1505.07818","DOI":"10.1007\/978-3-319-58347-1_10"},{"key":"e_1_3_2_2_19_1","volume-title":"Long short-term memory. Neural computation","author":"Hochreiter Sepp","year":"1997","unstructured":"Sepp Hochreiter and J\u00fcrgen Schmidhuber. 1997. Long short-term memory. Neural computation (1997)."},{"key":"e_1_3_2_2_20_1","volume-title":"Kingma and Jimmy Ba","author":"Diederik","year":"2017","unstructured":"Diederik P. Kingma and Jimmy Ba. 2017. Adam: A Method for Stochastic Optimization. arXiv:1412.6980 [cs.LG] https:\/\/arxiv.org\/abs\/1412.6980"},{"key":"e_1_3_2_2_21_1","unstructured":"Ilya Kostrikov Ashvin Nair and Sergey Levine. 2021. Offline Reinforcement Learning with Implicit Q-Learning. arXiv:2110.06169 [cs.LG] https:\/\/arxiv.org\/ abs\/2110.06169"},{"key":"e_1_3_2_2_22_1","unstructured":"Aviral Kumar Rishabh Agarwal Tengyu Ma Aaron Courville George Tucker and Sergey Levine. 2021. DR3: Value-Based Deep Reinforcement Learning Requires Explicit Regularization. arXiv:2112.04716 [cs.LG] https:\/\/arxiv.org\/abs\/2112. 04716"},{"key":"e_1_3_2_2_23_1","unstructured":"Aviral Kumar Justin Fu George Tucker and Sergey Levine. 2019. Stabilizing off-policy Q-learning via bootstrapping error reduction. In NIPS."},{"key":"e_1_3_2_2_24_1","unstructured":"Aviral Kumar Aurick Zhou George Tucker and Sergey Levine. 2020. Conservative Q-Learning for Offline Reinforcement Learning. arXiv:2006.04779 [cs.LG] https:\/\/arxiv.org\/abs\/2006.04779"},{"key":"e_1_3_2_2_25_1","volume-title":"Riedmiller","author":"Lange Sascha","year":"2012","unstructured":"Sascha Lange, Thomas Gabel, and Martin A. Riedmiller. 2012. Batch Reinforcement Learning. In Reinforcement Learning. https:\/\/api.semanticscholar.org\/ CorpusID:18760634"},{"key":"e_1_3_2_2_26_1","volume-title":"Neural Attentive Session-based Recommendation. In CIKM","author":"Li Jing","year":"2017","unstructured":"Jing Li, Pengjie Ren, Zhumin Chen, Zhaochun Ren, and Jun Ma. 2017. Neural Attentive Session-based Recommendation. In CIKM 2017."},{"key":"e_1_3_2_2_27_1","volume-title":"On a test of whether one of two random variables is stochastically larger than the other. The annals of mathematical statistics","author":"Mann Henry B","year":"1947","unstructured":"Henry B Mann and Donald R Whitney. 1947. On a test of whether one of two random variables is stochastically larger than the other. The annals of mathematical statistics (1947)."},{"key":"e_1_3_2_2_28_1","volume-title":"UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction. arXiv:1802.03426 [stat.ML] https:\/\/arxiv.org\/abs\/1802.03426","author":"McInnes Leland","year":"2020","unstructured":"Leland McInnes, John Healy, and James Melville. 2020. UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction. arXiv:1802.03426 [stat.ML] https:\/\/arxiv.org\/abs\/1802.03426"},{"key":"e_1_3_2_2_29_1","unstructured":"Andriy Mnih and Russ R Salakhutdinov. 2007. Probabilistic Matrix Factorization. In NIPS."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"crossref","unstructured":"Sriraam Natarajan and Prasad Tadepalli. 2005. Dynamic preferences in multicriteria reinforcement learning (ICML '05).","DOI":"10.1145\/1102351.1102427"},{"key":"e_1_3_2_2_31_1","volume-title":"Hern\u00e1n","author":"Robins James M.","year":"2008","unstructured":"James M. Robins and Miguel A. Hern\u00e1n. 2008. Estimation of the causal effects of time-varying exposures. https:\/\/api.semanticscholar.org\/CorpusID:268324361"},{"key":"e_1_3_2_2_32_1","first-page":"i","volume":"201","author":"Schoenauer-Sebag Alice","unstructured":"Alice Schoenauer-Sebag, Louise Heinrich, Marc Schoenauer, Michele Sebag, Lani F. Wu, and Steve J. Altschuler. 2019. Multi-Domain Adversarial Learning.","journal-title":"Steve J. Altschuler."},{"key":"e_1_3_2_2_33_1","unstructured":"Peter Schulam and Suchi Saria. 2018. Reliable Decision Support using Counterfactual Models. arXiv:1703.10651 [stat.ML] https:\/\/arxiv.org\/abs\/1703.10651"},{"key":"e_1_3_2_2_34_1","volume-title":"NIPS","author":"Sohn Kihyuk","year":"2015","unstructured":"Kihyuk Sohn, Xinchen Yan, and Honglak Lee. [n. d.]. Learning structured output representation using deep conditional generative models. In NIPS 2015."},{"key":"e_1_3_2_2_35_1","unstructured":"Hossein Soleimani Adarsh Subbaswamy and Suchi Saria. 2017. Treatment- Response Models for Counterfactual Reasoning with Continuous-time Continuous-valued Interventions. https:\/\/arxiv.org\/abs\/1704.02038"},{"volume-title":"Reinforcement learning: An introduction","author":"Sutton Richard S","key":"e_1_3_2_2_36_1","unstructured":"Richard S Sutton and Andrew G Barto. 1998. Reinforcement learning: An introduction, volume 1. MIT press Cambridge."},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539179"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"e_1_3_2_2_39_1","unstructured":"Zhendong Wang Jonathan J Hunt and Mingyuan Zhou. 2023. Diffusion Policies as an Expressive Policy Class for Offline Reinforcement Learning. arXiv:2208.06193 [cs.LG] https:\/\/arxiv.org\/abs\/2208.06193"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3159652.3159710"},{"key":"e_1_3_2_2_41_1","volume-title":"The Free Encyclopedia. https:\/\/en.wikipedia.org\/w\/index.php?title=Rummy&oldid=1194246527. [Online","author":"Wikipedia Wikipedia","year":"2024","unstructured":"Wikipedia contributors. 2024. Rummy - Wikipedia, The Free Encyclopedia. https:\/\/en.wikipedia.org\/w\/index.php?title=Rummy&oldid=1194246527. [Online; accessed 10-November-2024]."},{"key":"e_1_3_2_2_42_1","unstructured":"Runzhe Yang Xingyuan Sun and Karthik Narasimhan. 2019. A generalized algorithm for multi-objective reinforcement learning and policy adaptation."},{"key":"e_1_3_2_2_43_1","unstructured":"Runzhe Yang Xingyuan Sun and Karthik Narasimhan. 2019. A Generalized Algorithm for Multi-Objective Reinforcement Learning and Policy Adaptation. In NIPS."},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"crossref","unstructured":"Lixin Zou Long Xia Zhuoye Ding Jiaxing Song Weidong Liu and Dawei Yin. 2019. Reinforcement Learning to Optimize Long-term User Engagement in Recommender Systems. arXiv:1902.05570 [cs.IR] https:\/\/arxiv.org\/abs\/1902.05570","DOI":"10.1145\/3292500.3330668"}],"event":{"name":"WWW '25: The ACM Web Conference 2025","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Sydney NSW Australia","acronym":"WWW '25"},"container-title":["Companion Proceedings of the ACM on Web Conference 2025"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3701716.3715224","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3701716.3715224","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T03:04:49Z","timestamp":1759892689000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3701716.3715224"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,8]]},"references-count":44,"alternative-id":["10.1145\/3701716.3715224","10.1145\/3701716"],"URL":"https:\/\/doi.org\/10.1145\/3701716.3715224","relation":{},"subject":[],"published":{"date-parts":[[2025,5,8]]},"assertion":[{"value":"2025-05-23","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}