{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,19]],"date-time":"2025-09-19T08:23:40Z","timestamp":1758270220933,"version":"3.44.0"},"publisher-location":"Cham","reference-count":38,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030864859"},{"type":"electronic","value":"9783030864866"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-86486-6_7","type":"book-chapter","created":{"date-parts":[[2021,9,9]],"date-time":"2021-09-09T15:25:48Z","timestamp":1631201148000},"page":"104-121","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Learning to Build High-Fidelity and\u00a0Robust Environment Models"],"prefix":"10.1007","author":[{"given":"Weinan","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Zhengyu","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Jian","family":"Shen","sequence":"additional","affiliation":[]},{"given":"Minghuan","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Yimin","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Xing","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Ruiming","family":"Tang","sequence":"additional","affiliation":[]},{"given":"Zhenguo","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,9,10]]},"reference":[{"key":"7_CR1","unstructured":"Chua, K., Calandra, R., McAllister, R., Levine, S.: Deep reinforcement learning in a handful of trials using probabilistic dynamics models. In: NeurIPS, pp. 4759\u20134770 (2018)"},{"key":"7_CR2","unstructured":"Goodfellow, I.J., et al.: Generative adversarial nets. In: NIPS (2014)"},{"key":"7_CR3","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: ICML, pp. 1861\u20131870. PMLR (2018)"},{"key":"7_CR4","unstructured":"Ho, J., Ermon, S.: Generative adversarial imitation learning. In: Advances in Neural Information Processing Systems, pp. 4565\u20134573 (2016)"},{"issue":"2","key":"7_CR5","first-page":"21","volume":"50","author":"A Hussein","year":"2017","unstructured":"Hussein, A., Gaber, M.M., Elyan, E., Jayne, C.: Imitation learning: a survey of learning methods. ACM Comput. Surv. (CSUR) 50(2), 21 (2017)","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"7_CR6","unstructured":"Janner, M., Fu, J., Zhang, M., Levine, S.: When to trust your model: model-based policy optimization. In: NeurIPS, pp. 12519\u201312530 (2019)"},{"issue":"1","key":"7_CR7","first-page":"1334","volume":"17","author":"S Levine","year":"2016","unstructured":"Levine, S., Finn, C., Darrell, T., Abbeel, P.: End-to-end training of deep visuomotor policies. J. Mach. Learn. Res. 17(1), 1334\u20131373 (2016)","journal-title":"J. Mach. Learn. Res."},{"key":"7_CR8","unstructured":"Li, Y., Song, J., Ermon, S.: InfoGAIL: interpretable imitation learning from visual demonstrations. In: Advances in Neural Information Processing Systems, pp. 3812\u20133822 (2017)"},{"key":"7_CR9","unstructured":"Lin, Z., Thomas, G., Yang, G., Ma, T.: Model-based adversarial meta-reinforcement learning. In: Advances in Neural Information Processing Systems, vol. 33 (2020)"},{"key":"7_CR10","unstructured":"Luo, Y., Xu, H., Li, Y., Tian, Y., Darrell, T., Ma, T.: Algorithmic framework for model-based deep reinforcement learning with theoretical guarantees. In: ICLR (Poster) (2019)"},{"key":"7_CR11","unstructured":"Miyato, T., Kataoka, T., Koyama, M., Yoshida, Y.: Spectral normalization for generative adversarial networks. In: International Conference on Learning Representations (2018)"},{"issue":"7540","key":"7_CR12","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"key":"7_CR13","unstructured":"Nachum, O., Chow, Y., Dai, B., Li, L.: DualDICE: behavior-agnostic estimation of discounted stationary distribution corrections. arXiv preprint arXiv:1906.04733 (2019)"},{"key":"7_CR14","doi-asserted-by":"crossref","unstructured":"Nagabandi, A., Kahn, G., Fearing, R.S., Levine, S.: Neural network dynamics for model-based deep reinforcement learning with model-free fine-tuning. In: ICRA (2018)","DOI":"10.1109\/ICRA.2018.8463189"},{"key":"7_CR15","unstructured":"Ng, A.Y., Russell, S.: Algorithms for inverse reinforcement learning. In: ICML (2000)"},{"key":"7_CR16","unstructured":"Nilim, A., El Ghaoui, L.: Robustness in Markov decision problems with uncertain transition matrices. In: NIPS, pp. 839\u2013846. Citeseer (2003)"},{"key":"7_CR17","unstructured":"Paduraru, C.: Off-policy evaluation in Markov decision processes. Ph.D. thesis, Ph.D. dissertation. McGill University (2012)"},{"key":"7_CR18","unstructured":"Paine, T.L., et al.: Hyperparameter selection for offline reinforcement learning. arXiv preprint arXiv:2007.09055 (2020)"},{"key":"7_CR19","unstructured":"Pinto, L., Davidson, J., Sukthankar, R., Gupta, A.: Robust adversarial reinforcement learning. In: International Conference on Machine Learning, pp. 2817\u20132826. PMLR (2017)"},{"issue":"1","key":"7_CR20","doi-asserted-by":"publisher","first-page":"88","DOI":"10.1162\/neco.1991.3.1.88","volume":"3","author":"DA Pomerleau","year":"1991","unstructured":"Pomerleau, D.A.: Efficient training of artificial neural networks for autonomous navigation. Neural Comput. 3(1), 88\u201397 (1991)","journal-title":"Neural Comput."},{"key":"7_CR21","unstructured":"Rajeswaran, A., Ghotra, S., Ravindran, B., Levine, S.: EPOpt: learning robust neural network policies using model ensembles. In: ICLR (2016)"},{"key":"7_CR22","unstructured":"Ross, S., Bagnell, D.: Efficient reductions for imitation learning. In: AISTATS, pp. 661\u2013668. JMLR Workshop and Conference Proceedings (2010)"},{"key":"7_CR23","unstructured":"Schulman, J., Levine, S., Abbeel, P., Jordan, M., Moritz, P.: Trust region policy optimization. In: International Conference on Machine Learning, pp. 1889\u20131897 (2015)"},{"key":"7_CR24","doi-asserted-by":"crossref","unstructured":"Shang, W., Yu, Y., Li, Q., Qin, Z., Meng, Y., Ye, J.: Environment reconstruction with hidden confounders for reinforcement learning based recommendation. In: KDD (2019)","DOI":"10.1145\/3292500.3330933"},{"key":"7_CR25","doi-asserted-by":"crossref","unstructured":"Shi, J.C., Yu, Y., Da, Q., Chen, S.Y., Zeng, A.X.: Virtual-Taobao: virtualizing real-world online retail environment for reinforcement learning. In: AAAI, vol. 33, pp. 4902\u20134909 (2019)","DOI":"10.1609\/aaai.v33i01.33014902"},{"issue":"7587","key":"7_CR26","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","journal-title":"Nature"},{"key":"7_CR27","doi-asserted-by":"crossref","unstructured":"Syed, U., Bowling, M., Schapire, R.E.: Apprenticeship learning using linear programming. In: ICML, pp. 1032\u20131039. ACM (2008)","DOI":"10.1145\/1390156.1390286"},{"key":"7_CR28","doi-asserted-by":"crossref","unstructured":"Tamar, A., Glassner, Y., Mannor, S.: Optimizing the CVaR via sampling. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 29 (2015)","DOI":"10.1609\/aaai.v29i1.9561"},{"key":"7_CR29","unstructured":"Thomas, P., Brunskill, E.: Data-efficient off-policy policy evaluation for reinforcement learning. In: International Conference on Machine Learning, pp. 2139\u20132148. PMLR (2016)"},{"key":"7_CR30","unstructured":"Wu, Y.H., Fan, T.H., Ramadge, P.J., Su, H.: Model imitation for model-based reinforcement learning. arXiv preprint arXiv:1909.11821 (2019)"},{"key":"7_CR31","unstructured":"Xu, T., Li, Z., Yu, Y.: Error bounds of imitating policies and environments. In: Advances in Neural Information Processing Systems, vol. 33 (2020)"},{"key":"7_CR32","doi-asserted-by":"crossref","unstructured":"Zhang, H., et al.: Learning to design games: Strategic environments in reinforcement learning. IJCAI (2018)","DOI":"10.24963\/ijcai.2018\/426"},{"key":"7_CR33","unstructured":"Zhang, H., Chen, H., Xiao, C., Li, B., Boning, D., Hsieh, C.J.: Robust deep reinforcement learning against adversarial perturbations on observations. arXiv:2003.08938 (2020)"},{"key":"7_CR34","doi-asserted-by":"crossref","unstructured":"Zhang, H., et al.: CityFlow: a multi-agent reinforcement learning environment for large scale city traffic scenario. In: The World Wide Web Conference, pp. 3620\u20133624 (2019)","DOI":"10.1145\/3308558.3314139"},{"key":"7_CR35","doi-asserted-by":"crossref","unstructured":"Zhao, X., Xia, L., Zhang, L., Ding, Z., Yin, D., Tang, J.: Deep reinforcement learning for page-wise recommendations. In: RecSys, pp. 95\u2013103 (2018)","DOI":"10.1145\/3240323.3240374"},{"key":"7_CR36","doi-asserted-by":"crossref","unstructured":"Zheng, G., Liu, H., Xu, K., Li, Z.: Learning to simulate vehicle trajectories from demonstrations. In: ICDE, pp. 1822\u20131825. IEEE (2020)","DOI":"10.1109\/ICDE48307.2020.00179"},{"key":"7_CR37","unstructured":"Zhou, M., et al.: Smarts: scalable multi-agent reinforcement learning training school for autonomous driving. In: Conference on Robot Learning (2020)"},{"key":"7_CR38","unstructured":"Ziebart, B.D., Maas, A.L., Bagnell, J.A., Dey, A.K.: Maximum entropy inverse reinforcement learning. In: AAAI, Chicago, IL, USA, vol. 8, pp. 1433\u20131438 (2008)"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases. Research Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-86486-6_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,8]],"date-time":"2025-09-08T22:04:56Z","timestamp":1757369096000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-86486-6_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030864859","9783030864866"],"references-count":38,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-86486-6_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"10 September 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Bilbao","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Spain","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 September 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 September 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2021.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"869","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"210","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3-4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3-9","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held online due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}