{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T11:21:18Z","timestamp":1768994478175,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":39,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819556922","type":"print"},{"value":"9789819556939","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-5693-9_1","type":"book-chapter","created":{"date-parts":[[2026,1,20]],"date-time":"2026-01-20T21:22:58Z","timestamp":1768944178000},"page":"3-16","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Diffusion Dynamic Model for\u00a0Unsupervised Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Ran","family":"Chen","sequence":"first","affiliation":[]},{"given":"Xiaoliang","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Zhen","family":"Cui","sequence":"additional","affiliation":[]},{"given":"Luying","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Tong","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,21]]},"reference":[{"key":"1_CR1","unstructured":"Achiam, J., Held, D., Tamar, A., Abbeel, P.: Constrained policy optimization. In: International Conference on Machine Learning, pp. 22\u201331. PMLR (2017)"},{"key":"1_CR2","doi-asserted-by":"publisher","first-page":"58757","DOI":"10.52202\/079017-1873","volume":"37","author":"E Alonso","year":"2024","unstructured":"Alonso, E., Jelley, A., Micheli, V., Kanervisto, A., Storkey, A.J., Pearce, T., Fleuret, F.: Diffusion for world modeling: visual details matter in atari. Adv. Neural. Inf. Process. Syst. 37, 58757\u201358791 (2024)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1_CR3","unstructured":"Asadi, K., Misra, D., S, K., Littman, M.: Combating the compounding-error problem with a multi-step model. arXiv: Learning (2019)"},{"key":"1_CR4","unstructured":"Bellemare, M., Srinivasan, S., Ostrovski, G., Schaul, T., Saxton, D., Munos, R.: Unifying count-based exploration and intrinsic motivation. Adv. Neural Inf. Process. Syst. 29 (2016)"},{"key":"1_CR5","unstructured":"Burda, Y., Edwards, H., Pathak, D., Storkey, A., Darrell, T., Efros, A.A.: Large-scale study of curiosity-driven learning. arXiv preprint arXiv:1808.04355 (2018)"},{"key":"1_CR6","unstructured":"Campos, V., et al.: Beyond fine-tuning: transferring behavior in reinforcement learning. arXiv: Learning (2021)"},{"issue":"6","key":"1_CR7","first-page":"1","volume":"42","author":"X Chen","year":"2024","unstructured":"Chen, X., Wang, S., McAuley, J., Jannach, D., Yao, L.: On the opportunities and challenges of offline reinforcement learning for recommender systems. ACM Trans. Inf. Syst. 42(6), 1\u201326 (2024)","journal-title":"ACM Trans. Inf. Syst."},{"key":"1_CR8","unstructured":"Eysenbach, B., Gupta, A., Ibarz, J., Levine, S.: Diversity is all you need: learning skills without a reward function. Cornell University - arXiv (2018)"},{"key":"1_CR9","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International Conference on Machine Learning, pp. 1861\u20131870. PMLR (2018)"},{"key":"1_CR10","unstructured":"Hafner, D., Lillicrap, T., Ba, J., Norouzi, M.: Dream to control: learning behaviors by latent imagination. Learning (2019)"},{"key":"1_CR11","unstructured":"Hafner, D., et al.: Learning latent dynamics for planning from pixels. arXiv: Learning (2018)"},{"key":"1_CR12","unstructured":"Hansen, S., Dabney, W., Barreto, A., Wiele, T., Warde-Farley, D., Mnih, V.: Fast task inference with variational intrinsic successor features. Learning (2019)"},{"key":"1_CR13","unstructured":"Hansen-Estruch, P., Kostrikov, I., Janner, M., Kuba, J.G., Levine, S.: Idql: Implicit q-learning as an actor-critic method with diffusion policies. arXiv preprint arXiv:2304.10573 (2023)"},{"key":"1_CR14","first-page":"64896","volume":"36","author":"H He","year":"2023","unstructured":"He, H., et al.: Diffusion model is an effective planner and data synthesizer for multi-task reinforcement learning. Adv. Neural. Inf. Process. Syst. 36, 64896\u201364917 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1_CR15","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural. Inf. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1_CR16","first-page":"67195","volume":"36","author":"B Kang","year":"2023","unstructured":"Kang, B., Ma, X., Du, C., Pang, T., Yan, S.: Efficient diffusion policies for offline reinforcement learning. Adv. Neural. Inf. Process. Syst. 36, 67195\u201367212 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1_CR17","first-page":"21810","volume":"33","author":"R Kidambi","year":"2020","unstructured":"Kidambi, R., Rajeswaran, A., Netrapalli, P., Joachims, T.: Morel: model-based offline reinforcement learning. Adv. Neural. Inf. Process. Syst. 33, 21810\u201321823 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1_CR18","unstructured":"Lambert, N., Pister, K., Calandra, R.: Investigating compounding prediction errors in learned dynamics models. arXiv preprint arXiv:2203.09637 (2022)"},{"key":"1_CR19","unstructured":"Laskin, M., et al.: Urlb: unsupervised reinforcement learning benchmark. arXiv preprint arXiv:2110.15191 (2021)"},{"key":"1_CR20","unstructured":"Liu, H., Abbeel, P.: Aps: active pretraining with successor features. In: International Conference on Machine Learning (2021)"},{"key":"1_CR21","first-page":"18459","volume":"34","author":"H Liu","year":"2021","unstructured":"Liu, H., Abbeel, P.: Behavior from the void: Unsupervised active pre-training. Adv. Neural. Inf. Process. Syst. 34, 18459\u201318473 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1_CR22","first-page":"46323","volume":"36","author":"C Lu","year":"2023","unstructured":"Lu, C., Ball, P., Teh, Y.W., Parker-Holder, J.: Synthetic experience replay. Adv. Neural. Inf. Process. Syst. 36, 46323\u201346344 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1_CR23","unstructured":"Lu, C., Ball, P.J., Parker-Holder, J., Osborne, M.A., Roberts, S.J.: Revisiting design choices in offline model-based reinforcement learning. arXiv preprint arXiv:2110.04135 (2021)"},{"key":"1_CR24","doi-asserted-by":"crossref","unstructured":"Pathak, D., Agrawal, P., Efros, A.A., Darrell, T.: Curiosity-driven exploration by self-supervised prediction. In: International Conference on Machine Learning, pp. 2778\u20132787. PMLR (2017)","DOI":"10.1109\/CVPRW.2017.70"},{"key":"1_CR25","unstructured":"Rajeswar, S., et al.: Mastering the unsupervised reinforcement learning benchmark from pixels. In: International Conference on Machine Learning, pp. 28598\u201328617. PMLR (2023)"},{"key":"1_CR26","unstructured":"Seo, Y., Chen, L., Shin, J., Lee, H., Abbeel, P., Lee, K.: State entropy maximization with random encoders for efficient exploration. Cornell University - arXiv (2021)"},{"key":"1_CR27","unstructured":"Sharma, A., Gu, S., Levine, S., Kumar, V., Hausman, K.: Dynamics-aware unsupervised discovery of skills. arXiv: Learning (2019)"},{"key":"1_CR28","unstructured":"Silver, D., Lever, G., Heess, N., Degris, T., Wierstra, D., Riedmiller, M.: Deterministic policy gradient algorithms. In: International Conference on Machine Learning, pp. 387\u2013395. PMLR (2014)"},{"key":"1_CR29","unstructured":"Song, Y., Ermon, S.: Generative modeling by estimating gradients of the data distribution. In: Proceedings of the 33rd International Conference on Neural Information Processing Systems. vol.\u00a032, pp. 11918\u201311930 (2019)"},{"key":"1_CR30","unstructured":"Srinivas, A., Laskin, M., Abbeel, P.: Curl: contrastive unsupervised representations for reinforcement learning. arXiv: Learning (2020)"},{"key":"1_CR31","doi-asserted-by":"crossref","unstructured":"Sutton, R.S., Barto, A.G., et\u00a0al.: Reinforcement learning: an introduction. MIT Press Cambridge (1998)","DOI":"10.1109\/TNN.1998.712192"},{"key":"1_CR32","doi-asserted-by":"crossref","unstructured":"Tang, C., Abbatematteo, B., Hu, J., Chandra, R., Mart\u00b4in-Mart\u00b4in, R., Stone, P.: Deep reinforcement learning for robotics: a survey of real-world successes. In: Proceedings of the AAAI Conference on Artificial Intelligence. vol.\u00a039, pp. 28694\u201328698 (2025)","DOI":"10.1609\/aaai.v39i27.35095"},{"key":"1_CR33","unstructured":"Wang, Z., Hunt, J.J., Zhou, M.: Diffusion policies as an expressive policy class for offline reinforcement learning. arXiv preprint arXiv:2208.06193 (2022)"},{"key":"1_CR34","unstructured":"Yarats, D., Fergus, R., Lazaric, A., Pinto, L.: Mastering visual continuous control: Improved data-augmented reinforcement learning. arXiv preprint arXiv:2107.09645 (2021)"},{"key":"1_CR35","unstructured":"Yarats, D., Kostrikov, I., Fergus, R.: Image augmentation is all you need: regularizing deep reinforcement learning from pixels. In: International Conference on Learning Representations (2021)"},{"key":"1_CR36","unstructured":"Ying, C., Chen, H., Zhou, X., Hao, Z., Su, H., Zhu, J.: Exploratory diffusion policy for unsupervised reinforcement learning. arXiv preprint arXiv:2502.07279 (2025)"},{"key":"1_CR37","first-page":"14129","volume":"33","author":"T Yu","year":"2020","unstructured":"Yu, T., et al.: Mopo: model-based offline policy optimization. Adv. Neural. Inf. Process. Syst. 33, 14129\u201314142 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1_CR38","doi-asserted-by":"crossref","unstructured":"Yu, T., et\u00a0al.: Scaling robot learning with semantically imagined experience. arXiv preprint arXiv:2302.11550 (2023)","DOI":"10.15607\/RSS.2023.XIX.027"},{"key":"1_CR39","doi-asserted-by":"crossref","unstructured":"Zhao, Y., et al.: Multi-scenario combination based on multi-agent reinforcement learning to optimize the advertising recommendation system. In: 2024 5th International Conference on Artificial Intelligence and Electromechanical Automation (AIEA), pp. 190\u2013194. IEEE (2024)","DOI":"10.1109\/AIEA62095.2024.10692474"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-5693-9_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,20]],"date-time":"2026-01-20T21:23:04Z","timestamp":1768944184000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-5693-9_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819556922","9789819556939"],"references-count":39,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-5693-9_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"21 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shanghai","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2025.prcv.cn\/index.asp","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}