{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T21:30:35Z","timestamp":1743024635184,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":37,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819991181"},{"type":"electronic","value":"9789819991198"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-99-9119-8_26","type":"book-chapter","created":{"date-parts":[[2024,2,2]],"date-time":"2024-02-02T13:03:04Z","timestamp":1706878984000},"page":"285-297","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["How to\u00a0Select the\u00a0Appropriate One from\u00a0the\u00a0Trained Models for\u00a0Model-Based OPE"],"prefix":"10.1007","author":[{"given":"Chongchong","family":"Li","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yue","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhi-Ming","family":"Ma","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuting","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,2,3]]},"reference":[{"key":"26_CR1","unstructured":"Agarwal, R., Schuurmans, D., Norouzi, M.: An optimistic perspective on offline reinforcement learning. In: ICML, November 2020"},{"key":"26_CR2","doi-asserted-by":"crossref","unstructured":"Bain, M., Sammut, C.: A framework for behavioural cloning. In: Machine Intelligence 15, Intelligent Agents, pp. 103\u2013129. Oxford University, GBR (1999)","DOI":"10.1093\/oso\/9780198538677.003.0006"},{"key":"26_CR3","unstructured":"Barth-Maron, G., et al.: Distributional policy gradients. In: ICLR (2018)"},{"key":"26_CR4","unstructured":"Chua, K., Calandra, R., McAllister, R., Levine, S.: Deep reinforcement learning in a handful of trials using probabilistic dynamics models. In: NeurIPS (2018)"},{"key":"26_CR5","unstructured":"Dud\u00edk, M., Langford, J., Li, L.: Doubly robust policy evaluation and learning. In: ICML, pp. 1097\u20131104. Madison, WI, USA, June 2011"},{"key":"26_CR6","unstructured":"Fu, J., et al.: Benchmarks for deep off-policy evaluation. In: ICLR (2021)"},{"key":"26_CR7","unstructured":"Fujimoto, S., Meger, D., Precup, D.: Off-policy deep reinforcement learning without exploration. In: ICML, pp. 2052\u20132062, May 2019"},{"key":"26_CR8","unstructured":"Gulcehre, C., et al.: RL Unplugged: a suite of benchmarks for offline reinforcement learning. In: NeurIPS, vol. 33, pp. 7248\u20137259 (2020)"},{"key":"26_CR9","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: ICML, pp. 1861\u20131870, July 2018"},{"key":"26_CR10","unstructured":"Hallak, A., Schnitzler, F., Mann, T., Mannor, S.: Off-policy model-based learning under unknown factored dynamics. In: ICML, pp. 711\u2013719, June 2015"},{"key":"26_CR11","doi-asserted-by":"crossref","unstructured":"Hanna, J.P., Stone, P., Niekum, S.: Bootstrapping with models: confidence intervals for off-policy evaluation. In: AAAI, February 2017","DOI":"10.1609\/aaai.v31i1.11123"},{"key":"26_CR12","unstructured":"Janner, M., Fu, J., Zhang, M., Levine, S.: When to trust your model: model-based policy optimization. In: NeurIPS, vol. 32 (2019)"},{"key":"26_CR13","unstructured":"Kostrikov, I., Nachum, O.: Statistical Bootstrapping for Uncertainty Estimation in Off-Policy Evaluation. arXiv:2007.13609 [cs, stat], July 2020. arXiv: 2007.13609"},{"key":"26_CR14","unstructured":"Kumar, A., Fu, J., Soh, M., Tucker, G., Levine, S.: Stabilizing off-policy Q-learning via bootstrapping error reduction. In: NeurIPS, vol. 32 (2019)"},{"key":"26_CR15","unstructured":"K\u00e9gl, B., Hurtado, G., Thomas, A.: Model-based micro-data reinforcement learning: what are the crucial model properties and which model to choose? In: ICLR, September 2020"},{"key":"26_CR16","doi-asserted-by":"crossref","unstructured":"Lange, S., Gabel, T., Riedmiller, M.: Batch reinforcement learning. In: Reinforcement Learning: State-of-the-Art, pp. 45\u201373. Adaptation, Learning, and Optimization. Springer, Heidelberg (2012)","DOI":"10.1007\/978-3-642-27645-3_2"},{"key":"26_CR17","unstructured":"Levine, S., Kumar, A., Tucker, G., Fu, J.: Offline Reinforcement Learning: Tutorial, Review, and Perspectives on Open Problems. arXiv:2005.01643 (Nov 2020)"},{"key":"26_CR18","doi-asserted-by":"crossref","unstructured":"Li, L., Chu, W., Langford, J., Wang, X.: Unbiased offline evaluation of contextual-bandit-based news article recommendation algorithms. In: WSDM, February 2011","DOI":"10.1145\/1935826.1935878"},{"key":"26_CR19","unstructured":"Li, L., Munos, R., Szepesvari, C.: On Minimax Optimal Offline Policy Evaluation. arXiv:1409.3653 [cs], September 2014. arXiv: 1409.3653"},{"key":"26_CR20","unstructured":"Mandel, T., Liu, Y.E., Levine, S., Brunskill, E., Popovic, Z.: Offline policy evaluation across representations with applications to educational games. In: AAMAS, May 2014"},{"key":"26_CR21","unstructured":"Mnih, V., et al.: Asynchronous Methods for Deep Reinforcement Learning. In: ICML, pp. 1928\u20131937, June 2016"},{"issue":"456","key":"26_CR22","doi-asserted-by":"publisher","first-page":"1410","DOI":"10.1198\/016214501753382327","volume":"96","author":"SA Murphy","year":"2001","unstructured":"Murphy, S.A., van der Laan, M.J., Robins, J.M.: Marginal mean models for dynamic regimes. J. Am. Stat. Assoc. 96(456), 1410\u20131423 (2001)","journal-title":"J. Am. Stat. Assoc."},{"key":"26_CR23","unstructured":"Nachum, O., Chow, Y., Dai, B., Li, L.: DualDICE: behavior-agnostic estimation of discounted stationary distribution corrections. In: NeurIPS, vol. 32 (2019)"},{"key":"26_CR24","unstructured":"Paine, T.L., et al.: Hyperparameter Selection for Offline Reinforcement Learning. arXiv:2007.09055 [cs, stat], July 2020"},{"key":"26_CR25","unstructured":"Precup, D., Sutton, R.S., Singh, S.P.: Eligibility traces for off-policy policy evaluation. In: ICML, pp. 759\u2013766. San Francisco, CA, USA, June 2000"},{"key":"26_CR26","unstructured":"Siegel, N., et al.: Keep doing what worked: Behavior modelling priors for offline reinforcement learning. In: ICLR (2020)"},{"issue":"7587","key":"26_CR27","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of Go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","journal-title":"Nature"},{"key":"26_CR28","doi-asserted-by":"crossref","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)","DOI":"10.1109\/TNN.1998.712192"},{"key":"26_CR29","unstructured":"Tassa, Y., et al.: DeepMind Control Suite. arXiv:1801.00690 [cs], January 2018"},{"key":"26_CR30","unstructured":"Thomas, P., Brunskill, E.: Data-efficient off-policy policy evaluation for reinforcement learning. In: ICML, pp. 2139\u20132148, June 2016, iSSN: 1938\u20137228"},{"key":"26_CR31","doi-asserted-by":"crossref","unstructured":"Todorov, E., Erez, T., Tassa, Y.: MuJoCo: a physics engine for model-based control. In: IROS, pp. 5026\u20135033, October 2012","DOI":"10.1109\/IROS.2012.6386109"},{"key":"26_CR32","unstructured":"Uehara, M., Huang, J., Jiang, N.: Minimax weight and Q-function learning for off-policy evaluation. In: ICML, November 2020"},{"key":"26_CR33","unstructured":"Voloshin, C., Le, H.M., Jiang, N., Yue, Y.: Empirical Study of Off-Policy Policy Evaluation for Reinforcement Learning. arXiv:1911.06854 [cs, stat], November 2021"},{"key":"26_CR34","unstructured":"Wang, T., et al.: Benchmarking Model-Based Reinforcement Learning. arXiv:1907.02057 [cs, stat], July 2019. arXiv: 1907.02057"},{"key":"26_CR35","unstructured":"Wang, Z., et al.: Critic regularized regression. In: NeurIPS, vol. 33, pp. 7768\u20137778 (2020)"},{"key":"26_CR36","unstructured":"Yang, M., Nachum, O., Dai, B., Li, L., Schuurmans, D.: Off-policy evaluation via the regularized Lagrangian. In: NeurIPS, vol. 33, pp. 6551\u20136561 (2020)"},{"key":"26_CR37","unstructured":"Zhang, M.R., Paine, T., Nachum, O., Paduraru, C., Tucker, G., ziyu wang, Norouzi, M.: Autoregressive dynamics models for offline policy evaluation and optimization. In: ICLR (2021)"}],"container-title":["Lecture Notes in Computer Science","Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-99-9119-8_26","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,2]],"date-time":"2024-02-02T13:08:25Z","timestamp":1706879305000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-99-9119-8_26"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9789819991181","9789819991198"],"references-count":37,"URL":"https:\/\/doi.org\/10.1007\/978-981-99-9119-8_26","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"3 February 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CICAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"CAAI International Conference on Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Fuzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 July 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 July 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"cicai2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/cicai.caai.cn\/#\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"376","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"101","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"16","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.9","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1.9","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}