{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T09:12:49Z","timestamp":1743066769271,"version":"3.40.3"},"publisher-location":"Cham","reference-count":35,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030893699"},{"type":"electronic","value":"9783030893705"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-89370-5_1","type":"book-chapter","created":{"date-parts":[[2021,11,1]],"date-time":"2021-11-01T01:02:59Z","timestamp":1635728579000},"page":"3-16","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Consistency Regularization for Ensemble Model Based Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Ruonan","family":"Jia","sequence":"first","affiliation":[]},{"given":"Qingming","family":"Li","sequence":"additional","affiliation":[]},{"given":"Wenzhen","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Junge","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Xiu","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,11,1]]},"reference":[{"key":"1_CR1","doi-asserted-by":"crossref","unstructured":"Abbeel, P., Quigley, M., Ng, A.Y.: Using inaccurate models in reinforcement learning. In: Proceedings of the 23rd International Conference on Machine Learning, pp. 1\u20138 (2006)","DOI":"10.1145\/1143844.1143845"},{"key":"1_CR2","doi-asserted-by":"crossref","unstructured":"Abdullah, A., Veltkamp, R.C., Wiering, M.A.: An ensemble of deep support vector machines for image categorization. In: 2009 International Conference of Soft Computing and Pattern Recognition, pp. 301\u2013306. IEEE (2009)","DOI":"10.1109\/SoCPaR.2009.67"},{"key":"1_CR3","doi-asserted-by":"crossref","unstructured":"Bagnell, J.A., Schneider, J.G.: Autonomous helicopter control using reinforcement learning policy search methods. In: Proceedings 2001 ICRA. IEEE International Conference on Robotics and Automation (Cat. No. 01CH37164), vol. 2, pp. 1615\u20131620. IEEE (2001)","DOI":"10.1109\/ROBOT.2001.932842"},{"key":"1_CR4","doi-asserted-by":"crossref","unstructured":"Botev, Z.I., Kroese, D.P., Rubinstein, R.Y., et al.: The cross-entropy method for optimization. In: Handbook of statistics, vol. 31, pp. 35\u201359. Elsevier (2013)","DOI":"10.1016\/B978-0-444-53859-8.00003-5"},{"key":"1_CR5","unstructured":"Bousquet, O., Chapelle, O., Hein, M.: Measure based regularization. In: Advances in Neural Information Processing Systems, pp. 1221\u20131228 (2004)"},{"key":"1_CR6","unstructured":"Buckman, J., Hafner, D., et al.: Sample-efficient reinforcement learning with stochastic ensemble value expansion. In: Proceedings of the 32nd International Conference on Neural Information Processing Systems, pp. 8234\u20138244 (2018)"},{"key":"1_CR7","unstructured":"Chua, K., Calandra, R., et al.: Deep reinforcement learning in a handful of trials using probabilistic dynamics models. In: Proceedings of the 32nd International Conference on Neural Information Processing Systems, pp. 4759\u20134770 (2018)"},{"key":"1_CR8","unstructured":"Clavera, I., Fu, Y., Abbeel, P.: Model-augmented actor-critic: backpropagating through paths. In: 8th International Conference on Learning Representations, ICLR 2020, Addis Ababa, Ethiopia, 26\u201330 April 2020. OpenReview.net (2020)"},{"key":"1_CR9","unstructured":"Deisenroth, M.P., Rasmussen, C.E.: PILCO: a model-based and data-efficient approach to policy search. In: Proceedings of the 28th International Conference on Machine Learning, ICML 2011, Bellevue, Washington, USA, pp. 465\u2013472 (2011)"},{"key":"1_CR10","unstructured":"Feinberg, V., Wan, A., Stoica, I., Jordan, M.I., Gonzalez, J.E., Levine, S.: Model-based value estimation for efficient model-free reinforcement learning. CoRR abs\/1803.00101 (2018). arXiv:1803.00101"},{"key":"1_CR11","unstructured":"Graves, A.: Generating sequences with recurrent neural networks. CoRR abs\/1308.0850 (2013). arXiv:1308.0850"},{"key":"1_CR12","unstructured":"Ha, D., Schmidhuber, J.: Recurrent world models facilitate policy evolution. In: Proceedings of the 32nd International Conference on Neural Information Processing Systems, pp. 2455\u20132467 (2018)"},{"key":"1_CR13","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International Conference on Machine Learning, pp. 1861\u20131870. PMLR (2018)"},{"key":"1_CR14","unstructured":"Hafner, D., et al.: Learning latent dynamics for planning from pixels. In: International Conference on Machine Learning, pp. 2555\u20132565. PMLR (2019)"},{"key":"1_CR15","unstructured":"Heess, N., Wayne, G., Silver, D., et al.: Learning continuous control policies by stochastic value gradients. In: Proceedings of the 28th International Conference on Neural Information Processing Systems-Volume 2, pp. 2944\u20132952 (2015)"},{"key":"1_CR16","unstructured":"Janner, M., Fu, J., Zhang, M., Levine, S.: When to trust your model: model-based policy optimization. In: Advances in Neural Information Processing Systems 32, pp. 12498\u201312509 (2019)"},{"key":"1_CR17","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. In: 2nd International Conference on Learning Representations, ICLR 2014, Banff, AB, Canada, 14\u201316 April 2014, Conference Track Proceedings (2014)"},{"key":"1_CR18","unstructured":"Konda, V.R., Tsitsiklis, J.N.: Actor-critic algorithms. In: Advances in Neural Information Processing Systems, pp. 1008\u20131014. Citeseer (2000)"},{"key":"1_CR19","unstructured":"Kurutach, T., Clavera, I., Duan, Y., Tamar, A., Abbeel, P.: Model-ensemble trust-region policy optimization. In: International Conference on Learning Representations (2018)"},{"key":"1_CR20","unstructured":"Levine, S., Abbeel, P.: Learning neural network policies with guided policy search under unknown dynamics. In: Advances in Neural Information Processing Systems 27: Annual Conference on Neural Information Processing Systems 2014, 8\u201313 December 2014, Montreal, Quebec, Canada, pp. 1071\u20131079 (2014)"},{"key":"1_CR21","first-page":"39:1","volume":"17","author":"S Levine","year":"2016","unstructured":"Levine, S., Finn, C., Darrell, T., Abbeel, P.: End-to-end training of deep visuomotor policies. J. Mach. Learn. Res. 17, 39:1-39:40 (2016)","journal-title":"J. Mach. Learn. Res."},{"key":"1_CR22","unstructured":"Lillicrap, T.P., et al.: Continuous control with deep reinforcement learning. In: ICLR (Poster) (2016)"},{"key":"1_CR23","unstructured":"Lyu, J., Ma, X., Yan, J., Li, X.: Efficient continuous control with double actors and regularized critics. arXiv preprint arXiv:2106.03050 (2021)"},{"issue":"7540","key":"1_CR24","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"key":"1_CR25","doi-asserted-by":"crossref","unstructured":"Nagabandi, A., Kahn, G., Fearing, R.S., Levine, S.: Neural network dynamics for model-based deep reinforcement learning with model-free fine-tuning. In: 2018 IEEE International Conference on Robotics and Automation (ICRA), pp. 7559\u20137566. IEEE (2018)","DOI":"10.1109\/ICRA.2018.8463189"},{"key":"1_CR26","unstructured":"Richards, A.G.: Robust constrained model predictive control. Ph.D. thesis, Massachusetts Institute of Technology (2005)"},{"issue":"4","key":"1_CR27","first-page":"e1249","volume":"8","author":"O Sagi","year":"2018","unstructured":"Sagi, O., Rokach, L.: Ensemble learning: a survey. Wiley Interdisc. Rev.: Data Min. Knowl. Discov. 8(4), e1249 (2018)","journal-title":"Wiley Interdisc. Rev.: Data Min. Knowl. Discov."},{"key":"1_CR28","unstructured":"Schrittwieser, J.,et al.: Mastering atari, go, chess and shogi by planning with a learned model. CoRR abs\/1911.08265 (2019)"},{"key":"1_CR29","unstructured":"Schulman, J., Levine, S., Abbeel, P., Jordan, M.I., Moritz, P.: Trust region policy optimization. In: Proceedings of the 32nd International Conference on Machine Learning, ICML 2015, Lille, France, 6\u201311 July 2015, JMLR Workshop and Conference Proceedings, vol. 37, pp. 1889\u20131897. JMLR.org (2015)"},{"key":"1_CR30","unstructured":"Schulman, J., Moritz, P., Levine, S., Jordan, M.I., Abbeel, P.: High-dimensional continuous control using generalized advantage estimation. In: 4th International Conference on Learning Representations (2016)"},{"issue":"7587","key":"1_CR31","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","journal-title":"Nature"},{"issue":"7676","key":"1_CR32","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver, D., et al.: Mastering the game of go without human knowledge. Nature 550(7676), 354\u2013359 (2017)","journal-title":"Nature"},{"key":"1_CR33","volume-title":"Reinforcement Learning: An Introduction. Adaptive Computation and Machine Learning","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. Adaptive Computation and Machine Learning. MIT Press, Cambridge (1998)"},{"key":"1_CR34","doi-asserted-by":"crossref","unstructured":"Todorov, E., Erez, T., Tassa, Y.: Mujoco: a physics engine for model-based control. In: 2012 IEEE\/RSJ International Conference on Intelligent Robots and Systems, IROS 2012, Vilamoura, Algarve, Portugal, pp. 5026\u20135033. IEEE (2012)","DOI":"10.1109\/IROS.2012.6386109"},{"key":"1_CR35","unstructured":"Wang, T., Bao, X., Clavera, I., Hoang, J., et al.: Benchmarking model-based reinforcement learning. CoRR abs\/1907.02057 (2019)"}],"container-title":["Lecture Notes in Computer Science","PRICAI 2021: Trends in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-89370-5_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,11]],"date-time":"2024-09-11T04:54:18Z","timestamp":1726030458000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-89370-5_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030893699","9783030893705"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-89370-5_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"1 November 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRICAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Pacific Rim International Conference on Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hanoi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vietnam","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 November 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 November 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"pricai2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.pricai.org\/2021","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"382","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"93","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"28","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}