{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,26]],"date-time":"2025-05-26T01:10:40Z","timestamp":1748221840430,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":43,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819984343"},{"type":"electronic","value":"9789819984350"}],"license":[{"start":{"date-parts":[[2023,12,24]],"date-time":"2023-12-24T00:00:00Z","timestamp":1703376000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,12,24]],"date-time":"2023-12-24T00:00:00Z","timestamp":1703376000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-99-8435-0_16","type":"book-chapter","created":{"date-parts":[[2023,12,23]],"date-time":"2023-12-23T08:02:17Z","timestamp":1703318537000},"page":"200-212","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Adaptable Conservative Q-Learning for\u00a0Offline Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Lyn","family":"Qiu","sequence":"first","affiliation":[]},{"given":"Xu","family":"Li","sequence":"additional","affiliation":[]},{"given":"Lenghan","family":"Liang","sequence":"additional","affiliation":[]},{"given":"Mingming","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Junchi","family":"Yan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,12,24]]},"reference":[{"key":"16_CR1","unstructured":"Agarwal, A., Kakade, S., Yang, L.F.: Model-based reinforcement learning with a generative model is minimax optimal. In: Conference on Learning Theory, pp. 67\u201383. PMLR (2020)"},{"key":"16_CR2","unstructured":"Agarwal, R., Schuurmans, D., Norouzi, M.: An optimistic perspective on offline reinforcement learning. In: ICML, pp. 104\u2013114. PMLR (2020)"},{"key":"16_CR3","unstructured":"Ajay, A., Kumar, A., Agrawal, P., Levine, S., Nachum, O.: Opal: Offline primitive discovery for accelerating offline reinforcement learning. arXiv preprint arXiv:2010.13611 (2020)"},{"key":"16_CR4","first-page":"7436","volume":"34","author":"G An","year":"2021","unstructured":"An, G., Moon, S., Kim, J.H., Song, H.O.: Uncertainty-based offline reinforcement learning with diversified q-ensemble. Adv. Neural. Inf. Process. Syst. 34, 7436\u20137447 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"16_CR5","unstructured":"Azar, M.G., Osband, I., Munos, R.: Minimax regret bounds for reinforcement learning. In: International Conference on Machine Learning, pp. 263\u2013272. PMLR (2017)"},{"key":"16_CR6","first-page":"15084","volume":"34","author":"L Chen","year":"2021","unstructured":"Chen, L., et al.: Decision transformer: reinforcement learning via sequence modeling. Adv. Neural. Inf. Process. Syst. 34, 15084\u201315097 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"9","key":"16_CR7","doi-asserted-by":"publisher","first-page":"2419","DOI":"10.1007\/s10994-021-05961-4","volume":"110","author":"G Dulac-Arnold","year":"2021","unstructured":"Dulac-Arnold, G., et al.: Challenges of real-world reinforcement learning: definitions, benchmarks and analysis. Mach. Learn. 110(9), 2419\u20132468 (2021)","journal-title":"Mach. Learn."},{"key":"16_CR8","unstructured":"Fu, J., Kumar, A., Nachum, O., Tucker, G., Levine, S.: D4rl: datasets for deep data-driven reinforcement learning. arXiv preprint arXiv:2004.07219 (2020)"},{"key":"16_CR9","first-page":"20132","volume":"34","author":"S Fujimoto","year":"2021","unstructured":"Fujimoto, S., Gu, S.S.: A minimalist approach to offline reinforcement learning. Adv. Neural. Inf. Process. Syst. 34, 20132\u201320145 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"16_CR10","first-page":"1","volume":"2007","author":"JL Gui\u00f1\u00f3n","year":"2007","unstructured":"Gui\u00f1\u00f3n, J.L., Ortega, E., Garc\u00eda-Ant\u00f3n, J., P\u00e9rez-Herranz, V.: Moving average and savitzki-golay smoothing filters using mathcad. Papers ICEE 2007, 1\u20134 (2007)","journal-title":"Papers ICEE"},{"key":"16_CR11","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: ICML, pp. 1861\u20131870. PMLR (2018)"},{"key":"16_CR12","unstructured":"Jaques, N., et al.: Way off-policy batch deep reinforcement learning of implicit human preferences in dialog. arXiv preprint arXiv:1907.00456 (2019)"},{"key":"16_CR13","unstructured":"Jin, Y., Yang, Z., Wang, Z.: Is pessimism provably efficient for offline rl? In: International Conference on Machine Learning, pp. 5084\u20135096. PMLR (2021)"},{"key":"16_CR14","doi-asserted-by":"crossref","unstructured":"Kendall, A., et al.: Learning to drive in a day. In: ICRA, pp. 8248\u20138254. IEEE (2019)","DOI":"10.1109\/ICRA.2019.8793742"},{"key":"16_CR15","first-page":"21810","volume":"33","author":"R Kidambi","year":"2020","unstructured":"Kidambi, R., Rajeswaran, A., Netrapalli, P., Joachims, T.: Morel: Model-based offline reinforcement learning. Adv. Neural. Inf. Process. Syst. 33, 21810\u201321823 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"16_CR16","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"16_CR17","unstructured":"Kostrikov, I., Nair, A., Levine, S.: Offline reinforcement learning with implicit q-learning. arXiv preprint arXiv:2110.06169 (2021)"},{"key":"16_CR18","unstructured":"Kumar, A., Zhou, A., Tucker, G., Levine, S.: Conservative q-learning for offline reinforcement learning https:\/\/arxiv.org\/abs\/2006.04779"},{"key":"16_CR19","first-page":"1179","volume":"33","author":"A Kumar","year":"2020","unstructured":"Kumar, A., Zhou, A., Tucker, G., Levine, S.: Conservative q-learning for offline reinforcement learning. Adv. Neural. Inf. Process. Syst. 33, 1179\u20131191 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"16_CR20","doi-asserted-by":"crossref","unstructured":"Lange, S., Gabel, T., Riedmiller, M.: Batch reinforcement learning. Reinforcement learning: State-of-the-art, pp. 45\u201373 (2012)","DOI":"10.1007\/978-3-642-27645-3_2"},{"key":"16_CR21","unstructured":"Leibo, J.Z., Zambaldi, V., Lanctot, M., Marecki, J., Graepel, T.: Multi-agent reinforcement learning in sequential social dilemmas. arXiv preprint arXiv:1702.03037 (2017)"},{"key":"16_CR22","unstructured":"Levine, S., Kumar, A., Tucker, G., Fu, J.: Offline reinforcement learning: tutorial, review, and perspectives on open problems. arXiv preprint arXiv:2005.01643 (2020)"},{"key":"16_CR23","first-page":"12861","volume":"33","author":"G Li","year":"2020","unstructured":"Li, G., Wei, Y., Chi, Y., Gu, Y., Chen, Y.: Breaking the sample size barrier in model-based reinforcement learning with a generative model. Adv. Neural. Inf. Process. Syst. 33, 12861\u201312872 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"16_CR24","unstructured":"Liu, Y., Swaminathan, A., Agarwal, A., Brunskill, E.: Off-policy policy gradient with state distribution correction. arXiv preprint arXiv:1904.08473 (2019)"},{"key":"16_CR25","unstructured":"Lyu, J., Ma, X., Li, X., Lu, Z.: Mildly conservative q-learning for offline reinforcement learning. arXiv preprint arXiv:2206.04745 (2022)"},{"key":"16_CR26","first-page":"19235","volume":"34","author":"Y Ma","year":"2021","unstructured":"Ma, Y., Jayaraman, D., Bastani, O.: Conservative offline distributional reinforcement learning. Adv. Neural. Inf. Process. Syst. 34, 19235\u201319247 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"16_CR27","doi-asserted-by":"crossref","unstructured":"Munemasa, I., Tomomatsu, Y., Hayashi, K., Takagi, T.: Deep reinforcement learning for recommender systems. In: ICOIACT, pp. 226\u2013233. IEEE (2018)","DOI":"10.1109\/ICOIACT.2018.8350761"},{"key":"16_CR28","unstructured":"Nachum, O., Dai, B., Kostrikov, I., Chow, Y., Li, L., Schuurmans, D.: Algaedice: policy gradient from arbitrary experience. arXiv preprint arXiv:1912.02074 (2019)"},{"key":"16_CR29","unstructured":"O\u2019Donoghue, B., Osband, I., Munos, R., Mnih, V.: The uncertainty bellman equation and exploration. In: International Conference on Machine Learning, pp. 3836\u20133845 (2018)"},{"key":"16_CR30","unstructured":"Ovadia, Y., et al.: Can you trust your model\u2019s uncertainty? evaluating predictive uncertainty under dataset shift. In: Advances in Neural Information Processing Systems 32 (2019)"},{"key":"16_CR31","unstructured":"Paszke, Aet al.: Pytorch: an imperative style, high-performance deep learning library. In: Advances in Neural Information Processing Systems 32 (2019)"},{"key":"16_CR32","unstructured":"Precup, D., Sutton, R.S., Dasgupta, S.: Off-policy temporal-difference learning with function approximation. In: ICML, pp. 417\u2013424 (2001)"},{"key":"16_CR33","first-page":"11702","volume":"34","author":"P Rashidinejad","year":"2021","unstructured":"Rashidinejad, P., Zhu, B., Ma, C., Jiao, J., Russell, S.: Bridging offline reinforcement learning and imitation learning: a tale of pessimism. Adv. Neural. Inf. Process. Syst. 34, 11702\u201311716 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"16_CR34","unstructured":"Sinha, S., Mandlekar, A., Garg, A.: S4rl: surprisingly simple self-supervision for offline reinforcement learning in robotics. In: Conference on Robot Learning, pp. 907\u2013917. PMLR (2022)"},{"issue":"1","key":"16_CR35","first-page":"2603","volume":"17","author":"RS Sutton","year":"2016","unstructured":"Sutton, R.S., Mahmood, A.R., White, M.: An emphatic approach to the problem of off-policy temporal-difference learning. J. Mach. Learn. Res. 17(1), 2603\u20132631 (2016)","journal-title":"J. Mach. Learn. Res."},{"key":"16_CR36","doi-asserted-by":"crossref","unstructured":"Todorov, E., Erez, T., Tassa, Y.: Mujoco: a physics engine for model-based control. In: 2012 IEEE\/RSJ International Conference on Intelligent Robots and Systems, pp. 5026\u20135033. IEEE (2012)","DOI":"10.1109\/IROS.2012.6386109"},{"key":"16_CR37","unstructured":"Vinyals, O., et al.: Starcraft ii: A new challenge for reinforcement learning. arXiv preprint arXiv:1708.04782 (2017)"},{"key":"16_CR38","unstructured":"Wu, K., et al.: Acql: an adaptive conservative q-learning framework for offline reinforcement learning (2022)"},{"key":"16_CR39","unstructured":"Wu, Y., Tucker, G., Nachum, O.: Behavior regularized offline reinforcement learning. arXiv preprint arXiv:1911.11361 (2019)"},{"key":"16_CR40","unstructured":"Wu, Y., et al.: Uncertainty weighted actor-critic for offline reinforcement learning. arXiv preprint arXiv:2105.08140 (2021)"},{"key":"16_CR41","first-page":"28954","volume":"34","author":"T Yu","year":"2021","unstructured":"Yu, T., Kumar, A., Rafailov, R., Rajeswaran, A., Levine, S., Finn, C.: Combo: Conservative offline model-based policy optimization. Adv. Neural. Inf. Process. Syst. 34, 28954\u201328967 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"16_CR42","first-page":"14129","volume":"33","author":"T Yu","year":"2020","unstructured":"Yu, T., et al.: Mopo: model-based offline policy optimization. Adv. Neural. Inf. Process. Syst. 33, 14129\u201314142 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"16_CR43","doi-asserted-by":"crossref","unstructured":"Zou, L., Xia, L., Ding, Z., Song, J., Liu, W., Yin, D.: Reinforcement learning to optimize long-term user engagement in recommender systems. In: SIGKDD, pp. 2810\u20132818 (2019)","DOI":"10.1145\/3292500.3330668"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-99-8435-0_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,23]],"date-time":"2023-12-23T08:11:46Z","timestamp":1703319106000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-99-8435-0_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,24]]},"ISBN":["9789819984343","9789819984350"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-981-99-8435-0_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023,12,24]]},"assertion":[{"value":"24 December 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Xiamen","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 October 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/prcv2023.xmu.edu.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Microsoft CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1420","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"532","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"37% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3,78","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3,69","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}