{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T01:52:38Z","timestamp":1740102758353,"version":"3.37.3"},"reference-count":17,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,4,14]],"date-time":"2024-04-14T00:00:00Z","timestamp":1713052800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,4,14]],"date-time":"2024-04-14T00:00:00Z","timestamp":1713052800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,4,14]]},"DOI":"10.1109\/icassp48485.2024.10446392","type":"proceedings-article","created":{"date-parts":[[2024,3,18]],"date-time":"2024-03-18T18:56:31Z","timestamp":1710788191000},"page":"6540-6544","source":"Crossref","is-referenced-by-count":0,"title":["CDA-MBPO:Corrected Data Aggregation for Model-Based Policy Optimization"],"prefix":"10.1109","author":[{"given":"Xin","family":"Du","sequence":"first","affiliation":[{"name":"Suzhou University of Science and Technology,School of Electronics and Information Engineering"}]},{"given":"Shan","family":"Zhong","sequence":"additional","affiliation":[{"name":"Changshu Institute of Technology,School of Computer Science and Engineering"}]},{"given":"Wenhao","family":"Ying","sequence":"additional","affiliation":[{"name":"Changshu Institute of Technology,School of Computer Science and Engineering"}]},{"given":"Yi","family":"Wang","sequence":"additional","affiliation":[{"name":"Suzhou University of Science and Technology,School of Electronics and Information Engineering"}]},{"given":"Shengrong","family":"Gong","sequence":"additional","affiliation":[{"name":"Suzhou University of Science and Technology,School of Electronics and Information Engineering"}]}],"member":"263","reference":[{"article-title":"Jump-start reinforcement learning","volume-title":"International Conference on Machine Learning(ICML)","author":"Uchendu","key":"ref1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-023-06004-9"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1126\/science.adf6591"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2022.11.051"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/j.buildenv.2023.110546"},{"article-title":"Understanding self-predictive learning for reinforcement learning","volume-title":"International Conference on Machine Learning(ICML)","author":"Tang","key":"ref6"},{"key":"ref7","article-title":"Ramborl: Robust adversarial model-based offline reinforcement learning","author":"Rigter","year":"2022","journal-title":"Advances in Neural Information Processing Systems(NIPS)"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"key":"ref9","article-title":"When to trust your model: Model-based policy optimization","author":"Janner","year":"2019","journal-title":"Advances in Neural Information Processing Systems(NIPS)"},{"key":"ref10","article-title":"On effective scheduling of model-based reinforcement learning","author":"Lai","year":"2021","journal-title":"Advances in Neural Information Processing Systems(NIPS)"},{"article-title":"Bidirectional model-based policy optimization","volume-title":"International Conference on Machine Learning(ICML)","author":"Lai","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CDC45484.2021.9683134"},{"key":"ref13","article-title":"When to update your model: Constrained model-based reinforcement learning","author":"Ji","year":"2022","journal-title":"Advances in Neural Information Processing Systems(NIPS)"},{"key":"ref14","article-title":"Plan to predict: Learning an uncertainty-foreseeing model for model-based reinforcement learning","author":"Wu","year":"2022","journal-title":"Advances in Neural Information Processing Systems(NIPS)"},{"key":"ref15","article-title":"Deep reinforcement learning in a handful of trials using probabilistic dynamics models","author":"Chua","year":"2018","journal-title":"Advances in neural information processing systems(NIPS)"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v29i1.9590"},{"article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"International conference on machine learning(ICML)","author":"Haarnoja","key":"ref17"}],"event":{"name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","start":{"date-parts":[[2024,4,14]]},"location":"Seoul, Korea, Republic of","end":{"date-parts":[[2024,4,19]]}},"container-title":["ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10445798\/10445803\/10446392.pdf?arnumber=10446392","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,2]],"date-time":"2024-08-02T05:14:07Z","timestamp":1722575647000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10446392\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,14]]},"references-count":17,"URL":"https:\/\/doi.org\/10.1109\/icassp48485.2024.10446392","relation":{},"subject":[],"published":{"date-parts":[[2024,4,14]]}}}