{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,30]],"date-time":"2026-01-30T03:51:35Z","timestamp":1769745095377,"version":"3.49.0"},"publisher-location":"Cham","reference-count":41,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031703676","type":"print"},{"value":"9783031703683","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-70368-3_2","type":"book-chapter","created":{"date-parts":[[2024,8,30]],"date-time":"2024-08-30T23:03:11Z","timestamp":1725058991000},"page":"22-39","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Model-Based Reinforcement Learning with\u00a0Multi-task Offline Pretraining"],"prefix":"10.1007","author":[{"given":"Minting","family":"Pan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yitao","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yunbo","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaokang","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,8,22]]},"reference":[{"key":"2_CR1","unstructured":"Anand, A., Racah, E., Ozair, S., Bengio, Y., C\u00f4t\u00e9, M.A., Hjelm, R.D.: Unsupervised state representation learning in Atari. In: NeurIPS, vol.\u00a032 (2019)"},{"key":"2_CR2","unstructured":"Chen, L., et al.: Decision transformer: reinforcement learning via sequence modeling. In: NeurIPS, vol.\u00a034, pp. 15084\u201315097 (2021)"},{"key":"2_CR3","unstructured":"Cho, D., Shim, D., Kim, H.J.: S2p: state-conditioned image synthesis for data augmentation in offline reinforcement learning. In: NeurIPS (2022)"},{"key":"2_CR4","doi-asserted-by":"publisher","first-page":"104256","DOI":"10.1016\/j.robot.2022.104256","volume":"157","author":"R Choudhary","year":"2022","unstructured":"Choudhary, R., Walambe, R., Kotecha, K.: Spatial and temporal features unified self-supervised representation learning networks. Robot. Auton. Syst. 157, 104256 (2022)","journal-title":"Robot. Auton. Syst."},{"key":"2_CR5","unstructured":"Dosovitskiy, A., Ros, G., Codevilla, F., L\u00f3pez, A.M., Koltun, V.: CARLA: an open urban driving simulator. In: CoRL, vol.\u00a078, pp. 1\u201316 (2017)"},{"key":"2_CR6","doi-asserted-by":"crossref","unstructured":"Dwibedi, D., Tompson, J., Lynch, C., Sermanet, P.: Learning actionable representations from visual observations. In: IROS, pp. 1577\u20131584. IEEE (2018)","DOI":"10.1109\/IROS.2018.8593951"},{"key":"2_CR7","unstructured":"Ebert, F., Finn, C., Dasari, S., Xie, A., Lee, A., Levine, S.: Visual foresight: model-based deep reinforcement learning for vision-based robotic control. arXiv preprint arXiv:1812.00568 (2018)"},{"key":"2_CR8","unstructured":"Fujimoto, S., Meger, D., Precup, D.: Off-policy deep reinforcement learning without exploration. In: ICML, pp. 2052\u20132062. PMLR (2019)"},{"key":"2_CR9","unstructured":"Gelada, C., Kumar, S., Buckman, J., Nachum, O., Bellemare, M.G.: DeepMDP: learning continuous latent space models for representation learning. In: ICML, pp. 2170\u20132179. PMLR (2019)"},{"key":"2_CR10","unstructured":"Hafner, D., Lillicrap, T., Ba, J., Norouzi, M.: Dream to control: learning behaviors by latent imagination. In: ICLR (2020)"},{"key":"2_CR11","unstructured":"Hafner, D., et al.: Learning latent dynamics for planning from pixels. In: ICML, pp. 2555\u20132565. PMLR (2019)"},{"key":"2_CR12","unstructured":"Hafner, D., Lillicrap, T., Norouzi, M., Ba, J.: Mastering Atari with discrete world models. In: ICLR (2021)"},{"key":"2_CR13","unstructured":"Hafner, D., Pasukonis, J., Ba, J., Lillicrap, T.: Mastering diverse domains through world models. arXiv preprint arXiv:2301.04104 (2023)"},{"key":"2_CR14","unstructured":"Hansen, N., Su, H., Wang, X.: Td-mpc2: scalable, robust world models for continuous control. arXiv preprint arXiv:2310.16828 (2023)"},{"key":"2_CR15","doi-asserted-by":"crossref","unstructured":"Hester, T., et\u00a0al.: Deep q-learning from demonstrations. In: AAAI (2018)","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"2_CR16","doi-asserted-by":"publisher","first-page":"104425","DOI":"10.1016\/j.robot.2023.104425","volume":"165","author":"Y Kadokawa","year":"2023","unstructured":"Kadokawa, Y., Zhu, L., Tsurumine, Y., Matsubara, T.: Cyclic policy distillation: sample-efficient sim-to-real reinforcement learning with domain randomization. Robot. Auton. Syst. 165, 104425 (2023)","journal-title":"Robot. Auton. Syst."},{"key":"2_CR17","unstructured":"Kaiser, L., et\u00a0al.: Model-based reinforcement learning for Atari. In: ICLR (2019)"},{"key":"2_CR18","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)"},{"key":"2_CR19","unstructured":"Laskin, M., Srinivas, A., Abbeel, P.: CURL: contrastive unsupervised representations for reinforcement learning. In: ICML, vol.\u00a0119, pp. 5639\u20135650. PMLR (2020)"},{"key":"2_CR20","unstructured":"Laskin, M., Lee, K., Stooke, A., Pinto, L., Abbeel, P., Srinivas, A.: Reinforcement learning with augmented data. In: NeurIPS, vol.\u00a033, pp. 19884\u201319895 (2020)"},{"key":"2_CR21","unstructured":"Li, D., Wang, S., Chen, K., Li, B.: Contrastive inductive bias controlling networks for reinforcement learning. In: ACML, pp. 563\u2013578. PMLR (2023)"},{"key":"2_CR22","unstructured":"Liu, I.J., Peng, J., Schwing, A.G.: Knowledge flow: improve upon your teachers. In: ICLR (2019)"},{"key":"2_CR23","unstructured":"Nair, S., Rajeswaran, A., Kumar, V., Finn, C., Gupta, A.: R3m: a universal visual representation for robot manipulation. arXiv preprint arXiv:2203.12601 (2022)"},{"key":"2_CR24","unstructured":"Pan, M., Zhu, X., Wang, Y., Yang, X.: Iso-dream: isolating and leveraging noncontrollable visual dynamics in world models. In: NeurIPS, vol.\u00a035, pp. 23178\u201323191 (2022)"},{"key":"2_CR25","unstructured":"Schwarzer, M., et al.: Pretraining representations for data-efficient reinforcement learning. In: NeurIPS, vol.\u00a034, pp. 12686\u201312699 (2021)"},{"key":"2_CR26","unstructured":"Sekar, R., Rybkin, O., Daniilidis, K., Abbeel, P., Hafner, D., Pathak, D.: Planning to explore via self-supervised world models. In: ICML, pp. 8583\u20138592 (2020)"},{"key":"2_CR27","unstructured":"Seo, Y., Lee, K., James, S.L., Abbeel, P.: Reinforcement learning with action-free pre-training from videos. In: ICML, pp. 19561\u201319579. PMLR (2022)"},{"key":"2_CR28","unstructured":"Sohn, K., Lee, H., Yan, X.: Learning structured output representation using deep conditional generative models. In: NeurIPS, vol.\u00a028 (2015)"},{"key":"2_CR29","unstructured":"Stooke, A., Lee, K., Abbeel, P., Laskin, M.: Decoupling representation learning from reinforcement learning. In: ICML, pp. 9870\u20139879. PMLR (2021)"},{"key":"2_CR30","unstructured":"Sun, Y., Ma, S., Madaan, R., Bonatti, R., Huang, F., Kapoor, A.: Smart: self-supervised multi-task pretraining with control transformers. In: ICLR (2023)"},{"key":"2_CR31","unstructured":"Taiga, A.A., Agarwal, R., Farebrother, J., Courville, A., Bellemare, M.G.: Investigating multi-task pretraining and generalization in reinforcement learning. In: ICLR (2023)"},{"key":"2_CR32","unstructured":"Tassa, Y., et\u00a0al.: Deepmind control suite. arXiv preprint arXiv:1801.00690 (2018)"},{"key":"2_CR33","unstructured":"Xie, Z., Lin, Z., Ye, D., Fu, Q., Wei, Y., Li, S.: Future-conditioned unsupervised pretraining for decision transformer. In: ICML, pp. 38187\u201338203. PMLR (2023)"},{"key":"2_CR34","unstructured":"Xu, Y., et al.: On the feasibility of cross-task transfer with model-based reinforcement learning. In: ICLR (2023)"},{"key":"2_CR35","unstructured":"Yang, H., et al.: Self-supervised representations for multi-view reinforcement learning. In: UAI (2022)"},{"key":"2_CR36","unstructured":"Yang, M., Nachum, O.: Representation matters: offline pretraining for sequential decision making. In: ICML, pp. 11784\u201311794. PMLR (2021)"},{"key":"2_CR37","unstructured":"Yao, Z., Wang, Y., Long, M., Wang, J.: Unsupervised transfer learning for spatiotemporal predictive networks. In: ICML, pp. 10778\u201310788. PMLR (2020)"},{"key":"2_CR38","doi-asserted-by":"crossref","unstructured":"Yarats, D., Zhang, A., Kostrikov, I., Amos, B., Pineau, J., Fergus, R.: Improving sample efficiency in model-free reinforcement learning from images. In: AAAI, pp. 10674\u201310681 (2021)","DOI":"10.1609\/aaai.v35i12.17276"},{"key":"2_CR39","unstructured":"Ye, W., Liu, S., Kurutach, T., Abbeel, P., Gao, Y.: Mastering Atari games with limited data. In: NeurIPS (2021)"},{"key":"2_CR40","unstructured":"Yu, T., et al.: Meta-world: a benchmark and evaluation for multi-task and meta reinforcement learning. In: CoRL, pp. 1094\u20131100. PMLR (2020)"},{"issue":"5","key":"2_CR41","doi-asserted-by":"publisher","first-page":"2890","DOI":"10.1109\/LRA.2023.3259681","volume":"8","author":"Y Ze","year":"2023","unstructured":"Ze, Y., Hansen, N., Chen, Y., Jain, M., Wang, X.: Visual reinforcement learning with self-supervised 3d representations. IEEE Robot. Autom. Lett. 8(5), 2890\u20132897 (2023)","journal-title":"IEEE Robot. Autom. Lett."}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases. Research Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-70368-3_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,30]],"date-time":"2024-08-30T23:10:00Z","timestamp":1725059400000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-70368-3_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031703676","9783031703683"],"references-count":41,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-70368-3_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"22 August 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vilnius","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lithuania","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2024.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}