{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T16:47:22Z","timestamp":1755794842298,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":58,"publisher":"ACM","funder":[{"name":"Provincial Key Research and Development Program of Shandong Province","award":["2024CXGC010108"],"award-info":[{"award-number":["2024CXGC010108"]}]},{"name":"Technology Innovation Guidance Program of Shandong Province","award":["YDZX2024088"],"award-info":[{"award-number":["YDZX2024088"]}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2024YFC3307300, 2022YFC3303004"],"award-info":[{"award-number":["2024YFC3307300, 2022YFC3303004"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Natural Science Foundation of China","award":["62202271, 62472261, 62372275, 62272274, T2293773"],"award-info":[{"award-number":["62202271, 62472261, 62372275, 62272274, T2293773"]}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100007129","name":"Natural Science Foundation of Shandong Province","doi-asserted-by":"publisher","award":["ZR2024QF203"],"award-info":[{"award-number":["ZR2024QF203"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100007129","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,3]]},"DOI":"10.1145\/3711896.3737066","type":"proceedings-article","created":{"date-parts":[[2025,8,3]],"date-time":"2025-08-03T21:05:41Z","timestamp":1754255141000},"page":"4002-4013","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Offline Trajectory Optimization for Offline Reinforcement Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-3011-5745","authenticated-orcid":false,"given":"Ziqi","family":"Zhao","sequence":"first","affiliation":[{"name":"Shandong University, Qingdao, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9076-6565","authenticated-orcid":false,"given":"Zhaochun","family":"Ren","sequence":"additional","affiliation":[{"name":"Leiden University, Leiden, Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-7508-0964","authenticated-orcid":false,"given":"Liu","family":"Yang","sequence":"additional","affiliation":[{"name":"Shandong University, Qingdao, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-5242-6716","authenticated-orcid":false,"given":"Yunsen","family":"Liang","sequence":"additional","affiliation":[{"name":"Shandong University, Qingdao, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8452-9929","authenticated-orcid":false,"given":"Fajie","family":"Yuan","sequence":"additional","affiliation":[{"name":"Westlake University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2964-6422","authenticated-orcid":false,"given":"Pengjie","family":"Ren","sequence":"additional","affiliation":[{"name":"Shandong University, Qingdao, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4592-4074","authenticated-orcid":false,"given":"Zhumin","family":"Chen","sequence":"additional","affiliation":[{"name":"Shandong University, Qingdao, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0203-4610","authenticated-orcid":false,"given":"Jun","family":"Ma","sequence":"additional","affiliation":[{"name":"Shandong University, Qingdao, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6116-9115","authenticated-orcid":false,"given":"Xin","family":"Xin","sequence":"additional","affiliation":[{"name":"Shandong University, Qingdao, China"}]}],"member":"320","published-online":{"date-parts":[[2025,8,3]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Uncertainty-based offline reinforcement learning with diversified q-ensemble. Advances in neural information processing systems 34","author":"An Gaon","year":"2021","unstructured":"Gaon An, Seungyong Moon, Jang-Hyun Kim, and Hyun Oh Song. 2021. Uncertainty-based offline reinforcement learning with diversified q-ensemble. Advances in neural information processing systems 34 (2021), 7436-7447."},{"key":"e_1_3_2_2_2_1","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et al. 2020. Language models are few-shot learners. Advances in neural information processing systems 33 (2020) 1877-1901."},{"key":"e_1_3_2_2_3_1","volume-title":"Transdreamer: Reinforcement learning with transformer world models. arXiv preprint arXiv:2202.09481","author":"Chen Chang","year":"2022","unstructured":"Chang Chen, Yi-Fu Wu, Jaesik Yoon, and Sungjin Ahn. 2022. Transdreamer: Reinforcement learning with transformer world models. arXiv preprint arXiv:2202.09481 (2022)."},{"key":"e_1_3_2_2_4_1","volume-title":"Decision transformer: Reinforcement learning via sequence modeling. Advances in neural information processing systems 34","author":"Chen Lili","year":"2021","unstructured":"Lili Chen, Kevin Lu, Aravind Rajeswaran, Kimin Lee, Aditya Grover, Misha Laskin, Pieter Abbeel, Aravind Srinivas, and Igor Mordatch. 2021. Decision transformer: Reinforcement learning via sequence modeling. Advances in neural information processing systems 34 (2021), 15084-15097."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3523227.3546758"},{"key":"e_1_3_2_2_6_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_2_7_1","volume-title":"D4rl: Datasets for deep data-driven reinforcement learning. arXiv preprint arXiv:2004.07219","author":"Fu Justin","year":"2020","unstructured":"Justin Fu, Aviral Kumar, Ofir Nachum, George Tucker, and Sergey Levine. 2020. D4rl: Datasets for deep data-driven reinforcement learning. arXiv preprint arXiv:2004.07219 (2020)."},{"key":"e_1_3_2_2_8_1","volume-title":"A minimalist approach to offline reinforcement learning. Advances in neural information processing systems 34","author":"Fujimoto Scott","year":"2021","unstructured":"Scott Fujimoto and Shixiang Shane Gu. 2021. A minimalist approach to offline reinforcement learning. Advances in neural information processing systems 34 (2021), 20132-20145."},{"key":"e_1_3_2_2_9_1","volume-title":"International conference on machine learning. PMLR, 1587-1596","author":"Fujimoto Scott","year":"2018","unstructured":"Scott Fujimoto, Herke Hoof, and David Meger. 2018. Addressing function approximation error in actor-critic methods. In International conference on machine learning. PMLR, 1587-1596."},{"key":"e_1_3_2_2_10_1","volume-title":"International conference on machine learning. PMLR","author":"Fujimoto Scott","year":"2019","unstructured":"Scott Fujimoto, David Meger, and Doina Precup. 2019. Off-policy deep reinforcement learning without exploration. In International conference on machine learning. PMLR, 2052-2062."},{"key":"e_1_3_2_2_11_1","volume-title":"Snapshot ensembles: Train 1, get m for free. arXiv preprint arXiv:1704.00109","author":"Huang Gao","year":"2017","unstructured":"Gao Huang, Yixuan Li, Geoff Pleiss, Zhuang Liu, John E Hopcroft, and Kilian Q Weinberger. 2017. Snapshot ensembles: Train 1, get m for free. arXiv preprint arXiv:1704.00109 (2017)."},{"key":"e_1_3_2_2_12_1","volume-title":"Offline reinforcement learning as one big sequence modeling problem. Advances in neural information processing systems 34","author":"Janner Michael","year":"2021","unstructured":"Michael Janner, Qiyang Li, and Sergey Levine. 2021. Offline reinforcement learning as one big sequence modeling problem. Advances in neural information processing systems 34 (2021), 1273-1286."},{"key":"e_1_3_2_2_13_1","volume-title":"Craig Ferguson, Agata Lapedriza, Noah Jones, Shixiang Gu, and Rosalind Picard.","author":"Jaques Natasha","year":"2019","unstructured":"Natasha Jaques, Asma Ghandeharioun, Judy Hanwen Shen, Craig Ferguson, Agata Lapedriza, Noah Jones, Shixiang Gu, and Rosalind Picard. 2019. Way off-policy batch deep reinforcement learning of implicit human preferences in dialog. arXiv preprint arXiv:1907.00456 (2019)."},{"key":"e_1_3_2_2_14_1","volume-title":"Asma Ghandeharioun, Craig Ferguson, Agata Lapedriza, Noah Jones, Shixiang Shane Gu, and Rosalind Picard.","author":"Jaques Natasha","year":"2020","unstructured":"Natasha Jaques, Judy Hanwen Shen, Asma Ghandeharioun, Craig Ferguson, Agata Lapedriza, Noah Jones, Shixiang Shane Gu, and Rosalind Picard. 2020. Human-centric dialog training via offline reinforcement learning. arXiv preprint arXiv:2010.05848 (2020)."},{"key":"e_1_3_2_2_15_1","unstructured":"Dmitry Kalashnikov Alex Irpan Peter Pastor Julian Ibarz Alexander Herzog Eric Jang Deirdre Quillen Ethan Holly Mrinal Kalakrishnan Vincent Vanhoucke and Sergey Levine. 2018. QT-Opt: Scalable Deep Reinforcement Learning for Vision-Based Robotic Manipulation. arXiv:1806.10293 [cs.LG]"},{"key":"e_1_3_2_2_16_1","volume-title":"Morel: Model-based offline reinforcement learning. Advances in neural information processing systems 33","author":"Kidambi Rahul","year":"2020","unstructured":"Rahul Kidambi, Aravind Rajeswaran, Praneeth Netrapalli, and Thorsten Joachims. 2020. Morel: Model-based offline reinforcement learning. Advances in neural information processing systems 33 (2020), 21810-21823."},{"key":"e_1_3_2_2_17_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_2_18_1","volume-title":"International Conference on Machine Learning. PMLR, 5774-5783","author":"Kostrikov Ilya","year":"2021","unstructured":"Ilya Kostrikov, Rob Fergus, Jonathan Tompson, and Ofir Nachum. 2021. Offline reinforcement learning with fisher divergence critic regularization. In International Conference on Machine Learning. PMLR, 5774-5783."},{"key":"e_1_3_2_2_19_1","volume-title":"Offline reinforcement learning with implicit q-learning. arXiv preprint arXiv:2110.06169","author":"Kostrikov Ilya","year":"2021","unstructured":"Ilya Kostrikov, Ashvin Nair, and Sergey Levine. 2021. Offline reinforcement learning with implicit q-learning. arXiv preprint arXiv:2110.06169 (2021)."},{"key":"e_1_3_2_2_20_1","volume-title":"Stabilizing off-policy q-learning via bootstrapping error reduction. Advances in Neural Information Processing Systems 32","author":"Kumar Aviral","year":"2019","unstructured":"Aviral Kumar, Justin Fu, Matthew Soh, George Tucker, and Sergey Levine. 2019. Stabilizing off-policy q-learning via bootstrapping error reduction. Advances in Neural Information Processing Systems 32 (2019)."},{"key":"e_1_3_2_2_21_1","first-page":"1179","article-title":"Conservative q-learning for offline reinforcement learning","volume":"33","author":"Kumar Aviral","year":"2020","unstructured":"Aviral Kumar, Aurick Zhou, George Tucker, and Sergey Levine. 2020. Conservative q-learning for offline reinforcement learning. Advances in Neural Information Processing Systems 33 (2020), 1179-1191.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_22_1","volume-title":"Offline reinforcement learning: Tutorial, review, and perspectives on open problems. arXiv preprint arXiv:2005.01643","author":"Levine Sergey","year":"2020","unstructured":"Sergey Levine, Aviral Kumar, George Tucker, and Justin Fu. 2020. Offline reinforcement learning: Tutorial, review, and perspectives on open problems. arXiv preprint arXiv:2005.01643 (2020)."},{"key":"e_1_3_2_2_23_1","volume-title":"DiffStitch: Boosting Offline Reinforcement Learning with Diffusion-based Trajectory Stitching. arXiv preprint arXiv:2402.02439","author":"Li Guanghe","year":"2024","unstructured":"Guanghe Li, Yixiang Shan, Zhengbang Zhu, Ting Long, and Weinan Zhang. 2024. DiffStitch: Boosting Offline Reinforcement Learning with Diffusion-based Trajectory Stitching. arXiv preprint arXiv:2402.02439 (2024)."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.2196\/18477"},{"key":"e_1_3_2_2_25_1","volume-title":"Off-policy policy gradient with state distribution correction. arXiv preprint arXiv:1904.08473","author":"Liu Yao","year":"2019","unstructured":"Yao Liu, Adith Swaminathan, Alekh Agarwal, and Emma Brunskill. 2019. Off-policy policy gradient with state distribution correction. arXiv preprint arXiv:1904.08473 (2019)."},{"key":"e_1_3_2_2_26_1","volume-title":"Plan online, learn offline: Efficient learning and exploration via model-based control. arXiv preprint arXiv:1811.01848","author":"Lowrey Kendall","year":"2018","unstructured":"Kendall Lowrey, Aravind Rajeswaran, Sham Kakade, Emanuel Todorov, and Igor Mordatch. 2018. Plan online, learn offline: Efficient learning and exploration via model-based control. arXiv preprint arXiv:1811.01848 (2018)."},{"key":"e_1_3_2_2_27_1","volume-title":"Yee Whye Teh, and Jack Parker-Holder","author":"Lu Cong","year":"2024","unstructured":"Cong Lu, Philip Ball, Yee Whye Teh, and Jack Parker-Holder. 2024. Synthetic experience replay. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_2_28_1","volume-title":"Revisiting design choices in offline model-based reinforcement learning. arXiv preprint arXiv:2110.04135","author":"Lu Cong","year":"2021","unstructured":"Cong Lu, Philip J Ball, Jack Parker-Holder, Michael A Osborne, and Stephen J Roberts. 2021. Revisiting design choices in offline model-based reinforcement learning. arXiv preprint arXiv:2110.04135 (2021)."},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9196935"},{"key":"e_1_3_2_2_30_1","unstructured":"Tatsuya Matsushima Hiroki Furuta Yutaka Matsuo Ofir Nachum and Shixiang Gu. 2020. Deployment-Efficient Reinforcement Learning via Model-Based Offline Optimization. arXiv:2006.03647 [cs.LG]"},{"key":"e_1_3_2_2_31_1","volume-title":"Transformers are sample-efficient world models. arXiv preprint arXiv:2209.00588","author":"Micheli Vincent","year":"2022","unstructured":"Vincent Micheli, Eloi Alonso, and Fran\u00e7ois Fleuret. 2022. Transformers are sample-efficient world models. arXiv preprint arXiv:2209.00588 (2022)."},{"key":"e_1_3_2_2_32_1","volume-title":"Algaedice: Policy gradient from arbitrary experience. arXiv preprint arXiv:1912.02074","author":"Nachum Ofir","year":"2019","unstructured":"Ofir Nachum, Bo Dai, Ilya Kostrikov, Yinlam Chow, Lihong Li, and Dale Schuurmans. 2019. Algaedice: Policy gradient from arbitrary experience. arXiv preprint arXiv:1912.02074 (2019)."},{"key":"e_1_3_2_2_33_1","unstructured":"Alec Radford Karthik Narasimhan Tim Salimans Ilya Sutskever et al. 2018. Improving language understanding by generative pre-training. (2018)."},{"key":"e_1_3_2_2_34_1","unstructured":"Alec Radford JeffreyWu Rewon Child David Luan Dario Amodei Ilya Sutskever et al. 2019. Language models are unsupervised multitask learners. OpenAI blog 1 8 (2019) 9."},{"key":"e_1_3_2_2_35_1","unstructured":"Rafael Rafailov Tianhe Yu Aravind Rajeswaran and Chelsea Finn. 2021. Offline Reinforcement Learning from Images with Latent Space Models 1154-1168 pages."},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.5555\/3455716.3455856"},{"key":"e_1_3_2_2_37_1","volume-title":"Rambo-rl: Robust adversarial model-based offline reinforcement learning. Advances in neural information processing systems 35","author":"Rigter Marc","year":"2022","unstructured":"Marc Rigter, Bruno Lacerda, and Nick Hawes. 2022. Rambo-rl: Robust adversarial model-based offline reinforcement learning. Advances in neural information processing systems 35 (2022), 16082-16097."},{"key":"e_1_3_2_2_38_1","first-page":"27580","article-title":"Online and offline reinforcement learning by planning with a learned model","volume":"34","author":"Schrittwieser Julian","year":"2021","unstructured":"Julian Schrittwieser, Thomas Hubert, Amol Mandhane, Mohammadamin Barekatain, Ioannis Antonoglou, and David Silver. 2021. Online and offline reinforcement learning by planning with a learned model. Advances in Neural Information Processing Systems 34 (2021), 27580-27591.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_39_1","volume-title":"Felix Berkenkamp, Abbas Abdolmaleki, Michael Neunert, Thomas Lampe, Roland Hafner, Nicolas Heess, and Martin Riedmiller.","author":"Siegel Noah Y","year":"2020","unstructured":"Noah Y Siegel, Jost Tobias Springenberg, Felix Berkenkamp, Abbas Abdolmaleki, Michael Neunert, Thomas Lampe, Roland Hafner, Nicolas Heess, and Martin Riedmiller. 2020. Keep doing what worked: Behavioral modelling priors for offline reinforcement learning. arXiv preprint arXiv:2002.08396 (2020)."},{"key":"e_1_3_2_2_40_1","volume-title":"Cog: Connecting new skills to past experience with offline reinforcement learning. arXiv preprint arXiv:2010.14500","author":"Singh Avi","year":"2020","unstructured":"Avi Singh, Albert Yu, Jonathan Yang, Jesse Zhang, Aviral Kumar, and Sergey Levine. 2020. Cog: Connecting new skills to past experience with offline reinforcement learning. arXiv preprint arXiv:2010.14500 (2020)."},{"key":"e_1_3_2_2_41_1","volume-title":"Conference on Robot Learning. PMLR, 907-917","author":"Sinha Samarth","year":"2022","unstructured":"Samarth Sinha, Ajay Mandlekar, and Animesh Garg. 2022. S4rl: Surprisingly simple self-supervision for offline reinforcement learning in robotics. In Conference on Robot Learning. PMLR, 907-917."},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.5555\/2946645.3007026"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.5555\/2789272.2886805"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"crossref","unstructured":"Phillip Swazinna Steffen Udluft and Thomas Runkler. 2021. Overcoming Model Bias for Robust Offline Deep Reinforcement Learning. arXiv:2008.05533 [cs.LG]","DOI":"10.1016\/j.engappai.2021.104366"},{"key":"e_1_3_2_2_45_1","volume-title":"Leveraging Factored Action Spaces for Efficient Offline Reinforcement Learning in Healthcare. In Decision Awareness in Reinforcement Learning Workshop at ICML","author":"Tang Shengpu","year":"2022","unstructured":"Shengpu Tang, Maggie Makar, Michael Sjoding, Finale Doshi-Velez, and Jenna Wiens. 2022. Leveraging Factored Action Spaces for Efficient Offline Reinforcement Learning in Healthcare. In Decision Awareness in Reinforcement Learning Workshop at ICML 2022. https:\/\/openreview.net\/forum?id=wl_o_hilncS"},{"key":"e_1_3_2_2_46_1","unstructured":"Shengpu Tang and Jenna Wiens. 2021. Model Selection for Offline Reinforcement Learning: Practical Considerations for Healthcare Settings. arXiv:2107.11003 [cs.LG]"},{"key":"e_1_3_2_2_47_1","volume-title":"CORL: Research-oriented Deep Offline Reinforcement Learning Library. In 3rd Offline RL Workshop: Offline RL as a ''Launchpad''. https:\/\/openreview.net\/forum?id=SyAS49bBcv","author":"Tarasov Denis","year":"2022","unstructured":"Denis Tarasov, Alexander Nikulin, Dmitry Akimov, Vladislav Kurenkov, and Sergey Kolesnikov. 2022. CORL: Research-oriented Deep Offline Reinforcement Learning Library. In 3rd Offline RL Workshop: Offline RL as a ''Launchpad''. https:\/\/openreview.net\/forum?id=SyAS49bBcv"},{"key":"e_1_3_2_2_48_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_2_49_1","first-page":"34748","article-title":"Bootstrapped transformer for offline reinforcement learning","volume":"35","author":"Zhao Hanye","year":"2022","unstructured":"KerongWang, Hanye Zhao, Xufang Luo, Kan Ren,Weinan Zhang, and Dongsheng Li. 2022. Bootstrapped transformer for offline reinforcement learning. Advances in Neural Information Processing Systems 35 (2022), 34748-34761.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_50_1","volume-title":"Behavior regularized offline reinforcement learning. arXiv preprint arXiv:1911.11361","author":"Wu Yifan","year":"2019","unstructured":"Yifan Wu, George Tucker, and Ofir Nachum. 2019. Behavior regularized offline reinforcement learning. arXiv preprint arXiv:1911.11361 (2019)."},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i5.16579"},{"key":"e_1_3_2_2_52_1","volume-title":"Bellman-consistent pessimism for offline reinforcement learning. Advances in neural information processing systems 34","author":"Xie Tengyang","year":"2021","unstructured":"Tengyang Xie, Ching-An Cheng, Nan Jiang, Paul Mineiro, and Alekh Agarwal. 2021. Bellman-consistent pessimism for offline reinforcement learning. Advances in neural information processing systems 34 (2021), 6683-6694."},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00271"},{"key":"e_1_3_2_2_54_1","volume-title":"Combo: Conservative offline model-based policy optimization. Advances in neural information processing systems 34","author":"Yu Tianhe","year":"2021","unstructured":"Tianhe Yu, Aviral Kumar, Rafael Rafailov, Aravind Rajeswaran, Sergey Levine, and Chelsea Finn. 2021. Combo: Conservative offline model-based policy optimization. Advances in neural information processing systems 34 (2021), 28954-28967."},{"key":"e_1_3_2_2_55_1","first-page":"14129","article-title":"Mopo: Model-based offline policy optimization","volume":"33","author":"Yu Tianhe","year":"2020","unstructured":"Tianhe Yu, Garrett Thomas, Lantao Yu, Stefano Ermon, James Y Zou, Sergey Levine, Chelsea Finn, and Tengyu Ma. 2020. Mopo: Model-based offline policy optimization. Advances in Neural Information Processing Systems 33 (2020), 14129-14142.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"crossref","unstructured":"Xianyuan Zhan Xiangyu Zhu and Haoran Xu. 2022. Model-Based Offline Planning with Trajectory Pruning. arXiv:2105.07351 [cs.AI]","DOI":"10.24963\/ijcai.2022\/516"},{"key":"e_1_3_2_2_57_1","volume-title":"STORM: Efficient stochastic transformer based world models for reinforcement learning. Advances in Neural Information Processing Systems 36","author":"Zhang Weipu","year":"2024","unstructured":"Weipu Zhang, GangWang, Jian Sun, Yetian Yuan, and Gao Huang. 2024. STORM: Efficient stochastic transformer based world models for reinforcement learning. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3134702"}],"event":{"name":"KDD '25: The 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"],"location":"Toronto ON Canada","acronym":"KDD '25"},"container-title":["Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.2"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3711896.3737066","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,16]],"date-time":"2025-08-16T14:33:14Z","timestamp":1755354794000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3711896.3737066"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,3]]},"references-count":58,"alternative-id":["10.1145\/3711896.3737066","10.1145\/3711896"],"URL":"https:\/\/doi.org\/10.1145\/3711896.3737066","relation":{},"subject":[],"published":{"date-parts":[[2025,8,3]]},"assertion":[{"value":"2025-08-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}