{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T07:36:13Z","timestamp":1767339373671,"version":"3.40.3"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031434204"},{"type":"electronic","value":"9783031434211"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-43421-1_32","type":"book-chapter","created":{"date-parts":[[2023,9,17]],"date-time":"2023-09-17T20:37:24Z","timestamp":1694983044000},"page":"540-555","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Filtered Observations for\u00a0Model-Based Multi-agent Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Linghui","family":"Meng","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xuantang","family":"Xiong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yifan","family":"Zang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xi","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guoqi","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dengpeng","family":"Xing","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bo","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,9,18]]},"reference":[{"key":"32_CR1","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2019.103216","volume":"280","author":"N Bard","year":"2020","unstructured":"Bard, N.: The hanabi challenge: a new frontier for AI research. Artif. Intell. 280, 103216 (2020)","journal-title":"Artif. Intell."},{"key":"32_CR2","first-page":"8780","volume":"34","author":"P Dhariwal","year":"2021","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat GANs on image synthesis. Adv. Neural. Inf. Process. Syst. 34, 8780\u20138794 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"doi-asserted-by":"crossref","unstructured":"Fang, K., Yin, P., Nair, A., Levine, S.: Planning to practice: efficient online fine-tuning by composing goals in latent space. arXiv preprint arXiv:2205.08129 (2022)","key":"32_CR3","DOI":"10.1109\/IROS47612.2022.9981999"},{"unstructured":"Hafner, D., Lillicrap, T., Ba, J., Norouzi, M.: Dream to control: learning behaviors by latent imagination. arXiv preprint arXiv:1912.01603 (2019)","key":"32_CR4"},{"unstructured":"Hafner, D., et al.: Learning latent dynamics for planning from pixels. arXiv preprint arXiv:1811.04551 (2018)","key":"32_CR5"},{"unstructured":"Hafner, D., Lillicrap, T., Norouzi, M., Ba, J.: Mastering Atari with discrete world models. arXiv preprint arXiv:2010.02193 (2020)","key":"32_CR6"},{"key":"32_CR7","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural. Inf. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"unstructured":"Hu, H., Foerster, J.N.: Simplified action decoder for deep multi-agent reinforcement learning. arXiv preprint arXiv:1912.02288 (2019)","key":"32_CR8"},{"unstructured":"Janner, M., Du, Y., Tenenbaum, J.B., Levine, S.: Planning with diffusion for flexible behavior synthesis. arXiv preprint arXiv:2205.09991 (2022)","key":"32_CR9"},{"unstructured":"Kuba, J.G., et al.: Trust region policy optimisation in multi-agent reinforcement learning. arXiv preprint arXiv:2109.11251 (2021)","key":"32_CR10"},{"unstructured":"Levine, S.: Reinforcement learning and control as probabilistic inference: Tutorial and review. arXiv preprint arXiv:1805.00909 (2018)","key":"32_CR11"},{"doi-asserted-by":"crossref","unstructured":"Mao, H., et al.: Neighborhood cognition consistent multi-agent reinforcement learning. In: AAAI (2020)","key":"32_CR12","DOI":"10.1609\/aaai.v34i05.6212"},{"key":"32_CR13","first-page":"24379","volume":"34","author":"R Mendonca","year":"2021","unstructured":"Mendonca, R., Rybkin, O., Daniilidis, K., Hafner, D., Pathak, D.: Discovering and achieving goals via world models. Adv. Neural. Inf. Process. Syst. 34, 24379\u201324391 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"32_CR14","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-28929-8","volume-title":"A Concise Introduction to Decentralized POMDPs","author":"FA Oliehoek","year":"2016","unstructured":"Oliehoek, F.A., Amato, C.: A Concise Introduction to Decentralized POMDPs. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-28929-8"},{"unstructured":"OroojlooyJadid, A., Hajinezhad, D.: A review of cooperative multi-agent deep reinforcement learning. arXiv preprint arXiv:1908.03963 (2019)","key":"32_CR15"},{"unstructured":"Paszke, A., et al.: Pytorch: an imperative style, high-performance deep learning library. In: Advances in Neural Information Processing Systems, vol. 32 (2019)","key":"32_CR16"},{"unstructured":"Pasztor, B., Bogunovic, I., Krause, A.: Efficient model-based multi-agent mean-field reinforcement learning. arXiv preprint arXiv:2107.04050 (2021)","key":"32_CR17"},{"unstructured":"Rashid, T., Samvelyan, M., Schroeder, C., Farquhar, G., Foerster, J., Whiteson, S.: Qmix: Monotonic value function factorisation for deep multi-agent reinforcement learning. In: International Conference on Machine Learning, pp. 4295\u20134304. PMLR (2018)","key":"32_CR18"},{"unstructured":"Samvelyan, M., et al.: The starcraft multi-agent challenge. arXiv preprint arXiv:1902.04043 (2019)","key":"32_CR19"},{"unstructured":"Sunehag, P., et al.: Value-decomposition networks for cooperative multi-agent learning. arXiv preprint arXiv:1706.05296 (2017)","key":"32_CR20"},{"unstructured":"Wang, T., Dong, H., Lesser, V.R., Zhang, C.: Roma: multi-agent reinforcement learning with emergent roles. arXiv:abs\/2003.08039 (2020)","key":"32_CR21"},{"unstructured":"Wang, T., Gupta, T., Mahajan, A., Peng, B., Whiteson, S., Zhang, C.: Rode: learning roles to decompose multi-agent tasks. arXiv:abs\/2010.01523 (2021)","key":"32_CR22"},{"unstructured":"Wang, T., Du, S.S., Torralba, A., Isola, P., Zhang, A., Tian, Y.: Denoised MDPs: learning world models better than the world itself. arXiv preprint arXiv:2206.15477 (2022)","key":"32_CR23"},{"unstructured":"Wang, X., Zhang, Z., Zhang, W.: Model-based multi-agent reinforcement learning: Recent progress and prospects. arXiv preprint arXiv:2203.10603 (2022)","key":"32_CR24"},{"unstructured":"Wen, M., et al.: Multi-agent reinforcement learning is a sequence modeling problem. arXiv preprint arXiv:2205.14953 (2022)","key":"32_CR25"},{"unstructured":"Wu, P., Escontrela, A., Hafner, D., Goldberg, K., Abbeel, P.: Daydreamer: world models for physical robot learning. arXiv preprint arXiv:2206.14176 (2022)","key":"32_CR26"},{"unstructured":"Xu, Y., et al.: Learning general world models in a handful of reward-free deployments. arXiv preprint arXiv:2210.12719 (2022)","key":"32_CR27"},{"doi-asserted-by":"crossref","unstructured":"Yarats, D., Zhang, A., Kostrikov, I., Amos, B., Pineau, J., Fergus, R.: Improving sample efficiency in model-free reinforcement learning from images. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 35, pp. 10674\u201310681 (2021)","key":"32_CR28","DOI":"10.1609\/aaai.v35i12.17276"},{"issue":"4","key":"32_CR29","doi-asserted-by":"publisher","first-page":"3691","DOI":"10.1007\/s10489-021-02554-5","volume":"52","author":"Z Ye","year":"2022","unstructured":"Ye, Z., Chen, Y., Jiang, X., Song, G., Yang, B., Fan, S.: Improving sample efficiency in multi-agent actor-critic methods. Appl. Intell. 52(4), 3691\u20133704 (2022)","journal-title":"Appl. Intell."},{"unstructured":"Yu, C., Velu, A., Vinitsky, E., Wang, Y., Bayen, A., Wu, Y.: The surprising effectiveness of PPO in cooperative, multi-agent games. arXiv preprint arXiv:2103.01955 (2021)","key":"32_CR30"},{"doi-asserted-by":"crossref","unstructured":"Zhang, K., Yang, Z., Ba\u015far, T.: Multi-agent reinforcement learning: a selective overview of theories and algorithms. In: Handbook of Reinforcement Learning and Control, pp. 321\u2013384 (2021)","key":"32_CR31","DOI":"10.1007\/978-3-030-60990-0_12"},{"unstructured":"Zhou, M., et al.: Smarts: scalable multi-agent reinforcement learning training school for autonomous driving. arXiv preprint arXiv:2010.09776 (2020)","key":"32_CR32"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases: Research Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-43421-1_32","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,17]],"date-time":"2023-09-17T20:47:02Z","timestamp":1694983622000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-43421-1_32"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031434204","9783031434211"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-43421-1_32","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"18 September 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Turin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2023.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"829","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"196","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.63","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Applied Data Science Track: 239 submissions, 58 accepted papers; Demo Track: 31 submissions, 16 accepted papers.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}