{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T00:06:46Z","timestamp":1755907606700,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":24,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,2,2]],"date-time":"2024-02-02T00:00:00Z","timestamp":1706832000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,2,2]]},"DOI":"10.1145\/3651671.3651762","type":"proceedings-article","created":{"date-parts":[[2024,6,7]],"date-time":"2024-06-07T18:55:50Z","timestamp":1717786550000},"page":"498-505","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Soft Adversarial Offline Reinforcement Learning via Reducing the Attack Strength for Generalization"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-8158-5248","authenticated-orcid":false,"given":"Wandi","family":"Qiao","sequence":"first","affiliation":[{"name":"MIRA Lab, University of Science and Technology of China, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-5137-9302","authenticated-orcid":false,"given":"Rui","family":"Yang","sequence":"additional","affiliation":[{"name":"MIRA Lab, University of Science and Technology of China, China"}]}],"member":"320","published-online":{"date-parts":[[2024,6,7]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Adversarial model for offline reinforcement learning. arXiv preprint arXiv:2302.11048","author":"Bhardwaj Mohak","year":"2023","unstructured":"Mohak Bhardwaj, Tengyang Xie, Byron Boots, Nan Jiang, and Ching-An Cheng. 2023. Adversarial model for offline reinforcement learning. arXiv preprint arXiv:2302.11048 (2023)."},{"key":"e_1_3_2_1_2_1","volume-title":"International Conference on Machine Learning. PMLR, 3852\u20133878","author":"Cheng Ching-An","year":"2022","unstructured":"Ching-An Cheng, Tengyang Xie, Nan Jiang, and Alekh Agarwal. 2022. Adversarially trained actor critic for offline reinforcement learning. In International Conference on Machine Learning. PMLR, 3852\u20133878."},{"key":"e_1_3_2_1_3_1","volume-title":"D4rl: Datasets for deep data-driven reinforcement learning. arXiv preprint arXiv:2004.07219","author":"Fu Justin","year":"2020","unstructured":"Justin Fu, Aviral Kumar, Ofir Nachum, George Tucker, and Sergey Levine. 2020. D4rl: Datasets for deep data-driven reinforcement learning. arXiv preprint arXiv:2004.07219 (2020)."},{"key":"e_1_3_2_1_4_1","volume-title":"A minimalist approach to offline reinforcement learning. Advances in neural information processing systems 34","author":"Fujimoto Scott","year":"2021","unstructured":"Scott Fujimoto and Shixiang\u00a0Shane Gu. 2021. A minimalist approach to offline reinforcement learning. Advances in neural information processing systems 34 (2021), 20132\u201320145."},{"key":"e_1_3_2_1_5_1","volume-title":"Off-Policy Deep Reinforcement Learning without Exploration. arXiv: Learning,arXiv: Learning (Dec","author":"Fujimoto Scott","year":"2018","unstructured":"Scott Fujimoto, David Meger, and Doina Precup. 2018. Off-Policy Deep Reinforcement Learning without Exploration. arXiv: Learning,arXiv: Learning (Dec 2018)."},{"key":"e_1_3_2_1_6_1","volume-title":"Explaining and harnessing adversarial examples. arXiv preprint arXiv:1412.6572","author":"Goodfellow J","year":"2014","unstructured":"Ian\u00a0J Goodfellow, Jonathon Shlens, and Christian Szegedy. 2014. Explaining and harnessing adversarial examples. arXiv preprint arXiv:1412.6572 (2014)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3054625"},{"key":"e_1_3_2_1_9_1","volume-title":"A survey of generalisation in deep reinforcement learning. arXiv e-prints","author":"Kirk Robert","year":"2021","unstructured":"Robert Kirk, Amy Zhang, Edward Grefenstette, and Tim Rockt\u00e4schel. 2021. A survey of generalisation in deep reinforcement learning. arXiv e-prints (2021), arXiv\u20132111."},{"key":"e_1_3_2_1_10_1","volume-title":"Offline reinforcement learning with implicit q-learning. arXiv preprint arXiv:2110.06169","author":"Kostrikov Ilya","year":"2021","unstructured":"Ilya Kostrikov, Ashvin Nair, and Sergey Levine. 2021. Offline reinforcement learning with implicit q-learning. arXiv preprint arXiv:2110.06169 (2021)."},{"key":"e_1_3_2_1_11_1","volume-title":"Stabilizing off-policy q-learning via bootstrapping error reduction. Advances in Neural Information Processing Systems 32","author":"Kumar Aviral","year":"2019","unstructured":"Aviral Kumar, Justin Fu, Matthew Soh, George Tucker, and Sergey Levine. 2019. Stabilizing off-policy q-learning via bootstrapping error reduction. Advances in Neural Information Processing Systems 32 (2019)."},{"key":"e_1_3_2_1_12_1","volume-title":"Stabilizing Off-Policy Q-Learning via Bootstrapping Error Reduction. Neural Information Processing Systems,Neural Information Processing Systems (Jun","author":"Kumar Aviral","year":"2019","unstructured":"Aviral Kumar, Justin Fu, Matthew Soh, George Tucker, and Sergey Levine. 2019. Stabilizing Off-Policy Q-Learning via Bootstrapping Error Reduction. Neural Information Processing Systems,Neural Information Processing Systems (Jun 2019)."},{"key":"e_1_3_2_1_13_1","first-page":"1179","article-title":"Conservative q-learning for offline reinforcement learning","volume":"33","author":"Kumar Aviral","year":"2020","unstructured":"Aviral Kumar, Aurick Zhou, George Tucker, and Sergey Levine. 2020. Conservative q-learning for offline reinforcement learning. Advances in Neural Information Processing Systems 33 (2020), 1179\u20131191.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_14_1","volume-title":"Controlling Overestimation Bias with Truncated Mixture of Continuous Distributional Quantile Critics. arXiv: Learning,arXiv: Learning (May","author":"Kuznetsov Arsenii","year":"2020","unstructured":"Arsenii Kuznetsov, Pavel Shvechikov, A.M. Grishin, and Dmitry Vetrov. 2020. Controlling Overestimation Bias with Truncated Mixture of Continuous Distributional Quantile Critics. arXiv: Learning,arXiv: Learning (May 2020)."},{"key":"e_1_3_2_1_15_1","volume-title":"Offline Reinforcement Learning: Tutorial, Review, and Perspectives on Open Problems. arXiv: Learning,arXiv: Learning (May","author":"Levine Sergey","year":"2020","unstructured":"Sergey Levine, Aviral Kumar, George Tucker, and Justin Fu. 2020. Offline Reinforcement Learning: Tutorial, Review, and Perspectives on Open Problems. arXiv: Learning,arXiv: Learning (May 2020)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-36108-1_12"},{"key":"e_1_3_2_1_17_1","unstructured":"Ashvin Nair Abhishek Gupta Murtaza Dalal and Sergey Levine. [n. d.]. AWAC: Accelerating Online Reinforcement Learning with Offline Datasets. ([n. d.])."},{"key":"e_1_3_2_1_18_1","volume-title":"Advantage-Weighted Regression: Simple and Scalable Off-Policy Reinforcement Learning","author":"Peng XueBin","year":"2021","unstructured":"XueBin Peng, Aviral Kumar, Grace Zhang, and Sergey Levine. 2021. Advantage-Weighted Regression: Simple and Scalable Off-Policy Reinforcement Learning. Cornell University - arXiv,Cornell University - arXiv (May 2021)."},{"key":"e_1_3_2_1_19_1","volume-title":"A survey on offline reinforcement learning: Taxonomy, review, and open problems","author":"Prudencio Rafael\u00a0Figueiredo","year":"2023","unstructured":"Rafael\u00a0Figueiredo Prudencio, Marcos\u00a0ROA Maximo, and Esther\u00a0Luna Colombini. 2023. A survey on offline reinforcement learning: Taxonomy, review, and open problems. IEEE Transactions on Neural Networks and Learning Systems (2023)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-021-09997-9"},{"key":"e_1_3_2_1_21_1","volume-title":"Reinforcement Learning for Education: Opportunities and Challenges.arXiv: Learning,arXiv: Learning (Jul","author":"Singla Adish","year":"2021","unstructured":"Adish Singla, AnnaN. Rafferty, Goran Radanovic, and NeilT. Heffernan. 2021. Reinforcement Learning for Education: Opportunities and Challenges.arXiv: Learning,arXiv: Learning (Jul 2021)."},{"key":"e_1_3_2_1_22_1","volume-title":"International Conference on Machine Learning. PMLR, 6215\u20136224","author":"Tessler Chen","year":"2019","unstructured":"Chen Tessler, Yonathan Efroni, and Shie Mannor. 2019. Action robust reinforcement learning and applications in continuous control. In International Conference on Machine Learning. PMLR, 6215\u20136224."},{"key":"e_1_3_2_1_23_1","first-page":"7768","article-title":"Critic regularized regression","volume":"33","author":"Wang Ziyu","year":"2020","unstructured":"Ziyu Wang, Alexander Novikov, Konrad Zolna, Josh\u00a0S Merel, Jost\u00a0Tobias Springenberg, Scott\u00a0E Reed, Bobak Shahriari, Noah Siegel, Caglar Gulcehre, Nicolas Heess, 2020. Critic regularized regression. Advances in Neural Information Processing Systems 33 (2020), 7768\u20137778.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_24_1","volume-title":"Model-based offline policy optimization with adversarial network. arXiv preprint arXiv:2309.02157","author":"Yang Junming","year":"2023","unstructured":"Junming Yang, Xingguo Chen, Shengyuan Wang, and Bolei Zhang. 2023. Model-based offline policy optimization with adversarial network. arXiv preprint arXiv:2309.02157 (2023)."}],"event":{"name":"ICMLC 2024: 2024 16th International Conference on Machine Learning and Computing","acronym":"ICMLC 2024","location":"Shenzhen China"},"container-title":["Proceedings of the 2024 16th International Conference on Machine Learning and Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3651671.3651762","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3651671.3651762","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T11:18:52Z","timestamp":1755861532000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3651671.3651762"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2,2]]},"references-count":24,"alternative-id":["10.1145\/3651671.3651762","10.1145\/3651671"],"URL":"https:\/\/doi.org\/10.1145\/3651671.3651762","relation":{},"subject":[],"published":{"date-parts":[[2024,2,2]]},"assertion":[{"value":"2024-06-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}