{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,14]],"date-time":"2026-02-14T21:34:05Z","timestamp":1771104845768,"version":"3.50.1"},"publisher-location":"Singapore","reference-count":52,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819787043","type":"print"},{"value":"9789819787050","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-97-8705-0_21","type":"book-chapter","created":{"date-parts":[[2025,2,7]],"date-time":"2025-02-07T14:37:07Z","timestamp":1738939027000},"page":"310-324","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Towards Robust Policy: Enhancing Offline Reinforcement Learning with\u00a0Adversarial Attacks and\u00a0Defenses"],"prefix":"10.1007","author":[{"given":"Thanh","family":"Nguyen","sequence":"first","affiliation":[]},{"given":"Tung M.","family":"Luu","sequence":"additional","affiliation":[]},{"given":"Tri","family":"Ton","sequence":"additional","affiliation":[]},{"given":"Chang D.","family":"Yoo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,2,8]]},"reference":[{"key":"21_CR1","unstructured":"Agarwal, R., Schuurmans, D., Norouzi, M.: An optimistic perspective on offline reinforcement learning. In: International Conference on Machine Learning, pp. 104\u2013114. PMLR (2020)"},{"key":"21_CR2","first-page":"29304","volume":"34","author":"R Agarwal","year":"2021","unstructured":"Agarwal, R., Schwarzer, M., Castro, P.S., Courville, A.C., Bellemare, M.: Deep reinforcement learning at the edge of the statistical precipice. Adv. Neural. Inf. Process. Syst. 34, 29304\u201329320 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"21_CR3","first-page":"7436","volume":"34","author":"G An","year":"2021","unstructured":"An, G., Moon, S., Kim, J.H., Song, H.O.: Uncertainty-based offline reinforcement learning with diversified q-ensemble. Adv. Neural. Inf. Process. Syst. 34, 7436\u20137447 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"21_CR4","unstructured":"Bai, C., Wang, L., Yang, Z., Deng, Z., Garg, A., Liu, P., Wang, Z.: Pessimistic bootstrapping for uncertainty-driven offline reinforcement learning. In: ICLR (2022)"},{"key":"21_CR5","unstructured":"Brandfonbrener, D., Combes, R.T.D., Laroche, R.: Incorporating explicit uncertainty estimates into deep offline reinforcement learning. arXiv preprint arXiv:2206.01085 (2022)"},{"key":"21_CR6","first-page":"4933","volume":"34","author":"D Brandfonbrener","year":"2021","unstructured":"Brandfonbrener, D., Whitney, W., Ranganath, R., Bruna, J.: Offline RL without off-policy evaluation. Adv. Neural. Inf. Process. Syst. 34, 4933\u20134946 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"21_CR7","unstructured":"Chen, J., Jiang, N.: Information-theoretic considerations in batch reinforcement learning. In: International Conference on Machine Learning, pp. 1042\u20131051. PMLR (2019)"},{"key":"21_CR8","first-page":"18353","volume":"33","author":"X Chen","year":"2020","unstructured":"Chen, X., Zhou, Z., Wang, Z., Wang, C., Wu, Y., Ross, K.: Bail: best-action imitation learning for batch deep reinforcement learning. Adv. Neural. Inf. Process. Syst. 33, 18353\u201318363 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"21_CR9","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"21_CR10","unstructured":"Fu, J., Kumar, A., Nachum, O., Tucker, G., Levine, S.: D4rl: datasets for deep data-driven reinforcement learning. arXiv preprint arXiv:2004.07219 (2020)"},{"key":"21_CR11","first-page":"20132","volume":"34","author":"S Fujimoto","year":"2021","unstructured":"Fujimoto, S., Gu, S.S.: A minimalist approach to offline reinforcement learning. Adv. Neural. Inf. Process. Syst. 34, 20132\u201320145 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"21_CR12","unstructured":"Fujimoto, S., Meger, D., Precup, D.: Off-policy deep reinforcement learning without exploration. In: International Conference on Machine Learning, pp. 2052\u20132062 (2019)"},{"key":"21_CR13","unstructured":"Fujimoto, S., Van\u00a0Hoof, H., Meger, D.: Addressing function approximation error in actor-critic methods. In: Proceedings of International Conference on Machine Learning (ICML), pp. 1582\u20131591 (2018)"},{"key":"21_CR14","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"21_CR15","unstructured":"Huang, S., Papernot, N., Goodfellow, I., Duan, Y., Abbeel, P.: Adversarial attacks on neural network policies. arXiv preprint arXiv:1702.02284 (2017)"},{"key":"21_CR16","unstructured":"Jin, Y., Yang, Z., Wang, Z.: Is pessimism provably efficient for offline RL? In: International Conference on Machine Learning, pp. 5084\u20135096. PMLR (2021)"},{"key":"21_CR17","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"21_CR18","unstructured":"Kos, J., Song, D.: Delving into adversarial attacks on deep policies. arXiv preprint arXiv:1705.06452 (2017)"},{"key":"21_CR19","unstructured":"Kostrikov, I., Nair, A., Levine, S.: Offline reinforcement learning with implicit q-learning. In: ICLR (2022)"},{"issue":"6","key":"21_CR20","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"A Krizhevsky","year":"2017","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. Commun. ACM 60(6), 84\u201390 (2017)","journal-title":"Commun. ACM"},{"key":"21_CR21","unstructured":"Kumar, A., Fu, J., Soh, M., Tucker, G., Levine, S.: Stabilizing off-policy q-learning via bootstrapping error reduction. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"21_CR22","first-page":"1179","volume":"33","author":"A Kumar","year":"2020","unstructured":"Kumar, A., Zhou, A., Tucker, G., Levine, S.: Conservative q-learning for offline reinforcement learning. Adv. Neural. Inf. Process. Syst. 33, 1179\u20131191 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"1","key":"21_CR23","first-page":"1334","volume":"17","author":"S Levine","year":"2016","unstructured":"Levine, S., Finn, C., Darrell, T., Abbeel, P.: End-to-end training of deep visuomotor policies. J. Mach. Learn. Res. 17(1), 1334\u20131373 (2016)","journal-title":"J. Mach. Learn. Res."},{"key":"21_CR24","unstructured":"Levine, S., Kumar, A., Tucker, G., Fu, J.: Offline reinforcement learning: tutorial, review, and perspectives on open problems. arXiv preprint arXiv:2005.01643 (2020)"},{"key":"21_CR25","doi-asserted-by":"crossref","unstructured":"Lin, Y.C., Hong, Z.W., Liao, Y.H., Shih, M.L., Liu, M.Y., Sun, M.: Tactics of adversarial attack on deep reinforcement learning agents. arXiv preprint arXiv:1703.06748 (2017)","DOI":"10.24963\/ijcai.2017\/525"},{"key":"21_CR26","doi-asserted-by":"publisher","first-page":"64965","DOI":"10.1109\/ACCESS.2022.3182107","volume":"10","author":"TM Luu","year":"2022","unstructured":"Luu, T.M., Nguyen, T., Vu, T., Yoo, C.D.: Utilizing skipped frames in action repeats for improving sample efficiency in reinforcement learning. IEEE Access 10, 64965\u201364975 (2022)","journal-title":"IEEE Access"},{"issue":"17","key":"21_CR27","doi-asserted-by":"publisher","first-page":"6504","DOI":"10.3390\/s22176504","volume":"22","author":"TM Luu","year":"2022","unstructured":"Luu, T.M., Vu, T., Nguyen, T., Yoo, C.D.: Visual pretraining via contrastive predictive model for pixel-based reinforcement learning. Sensors 22(17), 6504 (2022)","journal-title":"Sensors"},{"key":"21_CR28","unstructured":"Madry, A., Makelov, A., Schmidt, L., Tsipras, D., Vladu, A.: Towards deep learning models resistant to adversarial attacks. arXiv preprint arXiv:1706.06083 (2017)"},{"issue":"7540","key":"21_CR29","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"key":"21_CR30","unstructured":"Munos, R., Stepleton, T., Harutyunyan, A., Bellemare, M.: Safe and efficient off-policy reinforcement learning. In: Advances in Neural Information Processing Systems, vol. 29 (2016)"},{"key":"21_CR31","doi-asserted-by":"crossref","unstructured":"Nguyen, T., Luu, T., Pham, T., Rakhimkul, S., Yoo, C.D.: Robust maml: prioritization task buffer with adaptive learning process for model-agnostic meta-learning. In: ICASSP 2021-2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 3460\u20133464. IEEE (2021)","DOI":"10.1109\/ICASSP39728.2021.9413446"},{"key":"21_CR32","unstructured":"Nguyen, T., Luu, T., Yoo, C.D.: Fast and memory-efficient uncertainty-aware framework for offline reinforcement learning with rank one mimo q network. In: IROS 2023 Workshop on Policy Learning in Geometric Spaces. IROS 2023 Workshop (2023)"},{"key":"21_CR33","doi-asserted-by":"crossref","unstructured":"Nguyen, T., Luu, T.M., Vu, T., Yoo, C.D.: Sample-efficient reinforcement learning representation learning with curiosity contrastive forward dynamics model. In: 2021 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 3471\u20133477. IEEE (2021)","DOI":"10.1109\/IROS51168.2021.9636536"},{"key":"21_CR34","doi-asserted-by":"publisher","first-page":"21534","DOI":"10.1109\/ACCESS.2023.3236087","volume":"11","author":"T Nguyen","year":"2023","unstructured":"Nguyen, T., Pham, T.X., Zhang, C., Luu, T.M., Vu, T., Yoo, C.D.: Dimcl: dimensional contrastive learning for improving self-supervised learning. IEEE Access 11, 21534\u201321545 (2023)","journal-title":"IEEE Access"},{"key":"21_CR35","unstructured":"Pattanaik, A., Tang, Z., Liu, S., Bommannan, G., Chowdhary, G.: Robust deep reinforcement learning with adversarial attacks. In: AAMAS (2018)"},{"key":"21_CR36","first-page":"11702","volume":"34","author":"P Rashidinejad","year":"2021","unstructured":"Rashidinejad, P., Zhu, B., Ma, C., Jiao, J., Russell, S.: Bridging offline reinforcement learning and imitation learning: a tale of pessimism. Adv. Neural. Inf. Process. Syst. 34, 11702\u201311716 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"21_CR37","unstructured":"Schulman, J., Levine, S., Abbeel, P., Jordan, M., Moritz, P.: Trust region policy optimization. In: Proceedings of International Conference on Machine Learning (ICML), pp. 1889\u20131897 (2015)"},{"key":"21_CR38","unstructured":"Shen, Q., Li, Y., Jiang, H., Wang, Z., Zhao, T.: Deep reinforcement learning with robust and smooth policy. In: ICML, pp. 8707\u20138718 (2020)"},{"issue":"7587","key":"21_CR39","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","journal-title":"Nature"},{"key":"21_CR40","unstructured":"Sun, Y., Zheng, R., Liang, Y., Huang, F.: Who is the strongest enemy? towards optimal and efficient evasion attacks in deep RL. In: ICLR (2022)"},{"key":"21_CR41","doi-asserted-by":"crossref","unstructured":"Torabi, F., Warnell, G., Stone, P.: Behavioral cloning from observation. arXiv preprint arXiv:1805.01954 (2018)","DOI":"10.24963\/ijcai.2018\/687"},{"key":"21_CR42","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"21_CR43","doi-asserted-by":"crossref","unstructured":"Vu, T., Kim, K., Luu, T.M., Nguyen, T., Yoo, C.D.: Softgroup for 3d instance segmentation on point clouds. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2708\u20132717 (2022)","DOI":"10.1109\/CVPR52688.2022.00273"},{"key":"21_CR44","doi-asserted-by":"crossref","unstructured":"Vu, T., Kim, K., Nguyen, T., Luu, T.M., Kim, J., Yoo, C.D.: Scalable softgroup for 3d instance segmentation on point clouds. IEEE Trans. Pattern Anal. Mach. Intell. (2023)","DOI":"10.1109\/CVPR52688.2022.00273"},{"key":"21_CR45","first-page":"7768","volume":"33","author":"Z Wang","year":"2020","unstructured":"Wang, Z., et al.: Critic regularized regression. Adv. Neural. Inf. Process. Syst. 33, 7768\u20137778 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"21_CR46","unstructured":"Wu, Y., Tucker, G., Nachum, O.: Behavior regularized offline reinforcement learning. arXiv preprint arXiv:1911.11361 (2019)"},{"key":"21_CR47","first-page":"6683","volume":"34","author":"T Xie","year":"2021","unstructured":"Xie, T., Cheng, C.A., Jiang, N., Mineiro, P., Agarwal, A.: Bellman-consistent pessimism for offline reinforcement learning. Adv. Neural. Inf. Process. Syst. 34, 6683\u20136694 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"21_CR48","first-page":"23851","volume":"35","author":"R Yang","year":"2022","unstructured":"Yang, R., Bai, C., Ma, X., Wang, Z., Zhang, C., Han, L.: Rorl: robust offline reinforcement learning via conservative smoothing. Adv. Neural. Inf. Process. Syst. 35, 23851\u201323866 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"21_CR49","unstructured":"Yin, M., Duan, Y., Wang, M., Wang, Y.X.: Near-optimal offline reinforcement learning with linear representation: leveraging variance information with pessimism. arXiv preprint arXiv:2203.05804 (2022)"},{"key":"21_CR50","unstructured":"Zhang, H., Chen, H., Boning, D., Hsieh, C.J.: Robust reinforcement learning on state observations with learned optimal adversary. In: ICLR (2021)"},{"key":"21_CR51","first-page":"21024","volume":"33","author":"H Zhang","year":"2020","unstructured":"Zhang, H., et al.: Robust deep reinforcement learning against adversarial perturbations on state observations. Adv. Neural. Inf. Process. Syst. 33, 21024\u201321037 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"21_CR52","unstructured":"Zhang, X., Chen, Y., Zhu, X., Sun, W.: Corruption-robust offline reinforcement learning. In: International Conference on Artificial Intelligence and Statistics, pp. 5757\u20135773. PMLR (2022)"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-8705-0_21","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,7]],"date-time":"2025-02-07T14:37:41Z","timestamp":1738939061000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-8705-0_21"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819787043","9789819787050"],"references-count":52,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-8705-0_21","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"8 February 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICPRAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Pattern Recognition and Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Jeju Island","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Korea (Republic of)","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 June 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 June 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icprai2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/brain.korea.ac.kr\/icprai2024\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}