{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T08:32:17Z","timestamp":1743064337316,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":21,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819755806"},{"type":"electronic","value":"9789819755813"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-97-5581-3_35","type":"book-chapter","created":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T19:02:53Z","timestamp":1722538973000},"page":"432-442","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Value Function Evaluation with Data Augmentation for Offline Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Xianwei","family":"Zhou","sequence":"first","affiliation":[]},{"given":"Chulue","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Yifan","family":"Lin","sequence":"additional","affiliation":[]},{"given":"Songsen","family":"Yu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,8,1]]},"reference":[{"unstructured":"Levine, S., Kumar, A., Tucker, G., Fu, J.: Offline reinforcement learning: tutorial, review, and perspectives on open problems. arXiv preprint arXiv:2005.01643 (2020)","key":"35_CR1"},{"doi-asserted-by":"crossref","unstructured":"Prudencio, R.F., Maximo, M.R., Colombini, E.L.: A survey on offline reinforcement learning: Taxonomy, review, and open problems. IEEE Trans. Neural Netw. Learn. Syst. (2023)","key":"35_CR2","DOI":"10.1109\/TNNLS.2023.3250269"},{"unstructured":"Fujimoto, S., Meger, D., Precup, D.: Off-policy deep reinforcement learning without exploration. In: International Conference on Machine Learning, pp. 2052\u20132062. PMLR (2019)","key":"35_CR3"},{"unstructured":"Kumar, A., Fu, J., Soh, M., Tucker, G., Levine, S.: Stabilizing off-policy q-learning via bootstrapping error reduction. Adv. Neural Inf. Process. Syst. 32 (2019)","key":"35_CR4"},{"unstructured":"Wu, Y., Tucker, G., Nachum, O.: Behavior regularized offline reinforcement learning. arXiv preprint arXiv:1911.11361 (2019)","key":"35_CR5"},{"key":"35_CR6","first-page":"1179","volume":"33","author":"A Kumar","year":"2020","unstructured":"Kumar, A., Zhou, A., Tucker, G., Levine, S.: Conservative q-learning for offline reinforcement learning. Adv. Neural. Inf. Process. Syst. 33, 1179\u20131191 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"unstructured":"Kostrikov, I., Nair, A., Levine, S.: Offline reinforcement learning with implicit q-learning. arXiv preprint arXiv:2110.06169 (2021)","key":"35_CR7"},{"unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)","key":"35_CR8"},{"unstructured":"Fu, J., Kumar, A., Nachum, O., Tucker, G., Levine, S.: D4RL: datasets for deep data-driven reinforcement learning. arXiv preprint arXiv:2004.07219 (2020)","key":"35_CR9"},{"unstructured":"Kostrikov, I., Fergus, R., Tompson, J., Nachum, O.: Offline reinforcement learning with fisher divergence critic regularization. In: International Conference on Machine Learning, pp. 5774\u20135783. PMLR (2021)","key":"35_CR10"},{"unstructured":"Peng, X.B., Kumar, A., Zhang, G., Levine, S.: Advantage-weighted regression: Simple and scalable off-policy reinforcement learning. arXiv preprint arXiv:1910.00177 (2019)","key":"35_CR11"},{"unstructured":"Ashvin, N., Murtaza, D., Abhishek, G., Sergey, L.: Accelerating online reinforcement learning with offline datasets. CoRR, vol. abs\/2006.09359 (2020)","key":"35_CR12"},{"key":"35_CR13","first-page":"20132","volume":"34","author":"S Fujimoto","year":"2021","unstructured":"Fujimoto, S., Gu, S.S.: A minimalist approach to offline reinforcement learning. Adv. Neural. Inf. Process. Syst. 34, 20132\u201320145 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International Conference on Machine Learning, pp. 1861\u20131870. PMLR (2018)","key":"35_CR14"},{"unstructured":"Nachum, O., Dai, B., Kostrikov, I., Chow, Y., Li, L., Schuurmans, D.: AlgaeDICE: policy gradient from arbitrary experience. arXiv preprint arXiv:1912.02074 (2019)","key":"35_CR15"},{"unstructured":"Agarwal, R., Schuurmans, D., Norouzi, M.: An optimistic perspective on offline reinforcement learning. In: International Conference on Machine Learning, pp. 104\u2013114. PMLR (2020)","key":"35_CR16"},{"key":"35_CR17","first-page":"4933","volume":"34","author":"D Brandfonbrener","year":"2021","unstructured":"Brandfonbrener, D., Whitney, W., Ranganath, R., Bruna, J.: Offline RL without off-policy evaluation. Adv. Neural. Inf. Process. Syst. 34, 4933\u20134946 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"35_CR18","first-page":"19884","volume":"33","author":"M Laskin","year":"2020","unstructured":"Laskin, M., Lee, K., Stooke, A., Pinto, L., Abbeel, P., Srinivas, A.: Reinforcement learning with augmented data. Adv. Neural. Inf. Process. Syst. 33, 19884\u201319895 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"doi-asserted-by":"crossref","unstructured":"Yuan, Z., et al.: Don\u2019t touch what matters: task-aware lipschitz data augmentation for visual reinforcement learning. arXiv preprint arXiv:2202.09982 (2022)","key":"35_CR19","DOI":"10.24963\/ijcai.2022\/514"},{"unstructured":"Sinha, S., Mandlekar, A., Garg, A.: S4RL: surprisingly simple self-supervision for offline reinforcement learning in robotics. In: Conference on Robot Learning, pp. 907\u2013917. PMLR (2022)","key":"35_CR20"},{"unstructured":"Weissenbacher, M., Sinha, S., Garg, A., Yoshinobu, K.: Koopman q-learning: offline reinforcement learning via symmetries of dynamics. In: International Conference on Machine Learning, pp. 23645\u201323667. PMLR (2022)","key":"35_CR21"}],"container-title":["Lecture Notes in Computer Science","Advanced Intelligent Computing Technology and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-5581-3_35","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T19:18:34Z","timestamp":1722539914000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-5581-3_35"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9789819755806","9789819755813"],"references-count":21,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-5581-3_35","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"1 August 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tianjin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 August 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 August 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icic2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ic-icc.cn\/2024\/index.htm","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}