{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T14:24:26Z","timestamp":1743085466772,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":39,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819722525"},{"type":"electronic","value":"9789819722532"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-97-2253-2_22","type":"book-chapter","created":{"date-parts":[[2024,4,24]],"date-time":"2024-04-24T10:02:11Z","timestamp":1713952931000},"page":"273-285","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Communicative and\u00a0Cooperative Learning for\u00a0Multi-agent Indoor Navigation"],"prefix":"10.1007","author":[{"given":"Fengda","family":"Zhu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vincent CS","family":"Lee","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rui","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,4,25]]},"reference":[{"unstructured":"Aiello, M., et al.: IPPO: A privacy-aware architecture for decentralized data-sharing (2020). arXiv:2001.06420","key":"22_CR1"},{"unstructured":"Anderson, P., et al.: On evaluation of embodied navigation agents (2018). arXiv:1807.06757","key":"22_CR2"},{"doi-asserted-by":"crossref","unstructured":"Anderson, P., et al.: Vision-and-language navigation: interpreting visually-grounded navigation instructions in real environments. In: CVPR (2018)","key":"22_CR3","DOI":"10.1109\/CVPR.2018.00387"},{"unstructured":"Baker, B., et al.: Emergent tool use from multi-agent autocurricula. In: ICLR (2020)","key":"22_CR4"},{"doi-asserted-by":"crossref","unstructured":"Bard, N., et al.: The Hanabi challenge: a new frontier for AI research. Artif. Intell. 280, 103216 (2020)","key":"22_CR5","DOI":"10.1016\/j.artint.2019.103216"},{"unstructured":"Batra, D., et al.: ObjectNav revisited: On evaluation of embodied agents navigating to objects (2020). arXiv:2006.13171","key":"22_CR6"},{"unstructured":"Berner, C., et al.: Dota 2 with large scale deep reinforcement learning (2019). arXiv:1912.06680","key":"22_CR7"},{"doi-asserted-by":"crossref","unstructured":"Chang, A., et al.: Matterport3D: Learning from RGB-D data in indoor environments (2017). arXiv:1709.06158","key":"22_CR8","DOI":"10.1109\/3DV.2017.00081"},{"doi-asserted-by":"crossref","unstructured":"Cho, K., et al.: Learning phrase representations using RNN encoder-decoder for statistical machine translation (2014). arXiv:1406.1078","key":"22_CR9","DOI":"10.3115\/v1\/D14-1179"},{"doi-asserted-by":"crossref","unstructured":"Deitke, M., et\u00a0al.: RoboTHOR: an open simulation-to-real embodied AI platform. In: CVPR (2020)","key":"22_CR10","DOI":"10.1109\/CVPR42600.2020.00323"},{"doi-asserted-by":"crossref","unstructured":"Foerster, J., Farquhar, G., Afouras, T., Nardelli, N., Whiteson, S.: Counterfactual multi-agent policy gradients. In: AAAI (2018)","key":"22_CR11","DOI":"10.1609\/aaai.v32i1.11794"},{"unstructured":"Hu, S., Zhu, F., Chang, X., Liang, X.: UPDeT: universal multi-agent RL via policy decoupling with transformers. In: ICLR (2021)","key":"22_CR12"},{"unstructured":"Ikram, K., Mondrag\u00f3n, E., Alonso, E., Garcia-Ortiz, M.: HexaJungle: a marl simulator to study the emergence of language (2021)","key":"22_CR13"},{"doi-asserted-by":"crossref","unstructured":"Khan, M.J., Ahmed, S.H., Sukthankar, G.: Transformer-based value function decomposition for cooperative multi-agent reinforcement learning in starCraft. In: Proceedings of the AAAI Conference on Artificial Intelligence and Interactive Digital Entertainment. vol.\u00a018, pp. 113\u2013119 (2022)","key":"22_CR14","DOI":"10.1609\/aiide.v18i1.21954"},{"unstructured":"Kolve, E., Mottaghi, R., Gordon, D., Zhu, Y., Gupta, A., Farhadi, A.: AI2-THOR: An interactive 3D environment for visual AI (2017). arXiv:1712.05474","key":"22_CR15"},{"key":"22_CR16","first-page":"15230","volume":"34","author":"T Lin","year":"2021","unstructured":"Lin, T., Huh, J., Stauffer, C., Lim, S.N., Isola, P.: Learning to ground multi-agent communication with autoencoders. NeurIPS 34, 15230\u201315242 (2021)","journal-title":"NeurIPS"},{"doi-asserted-by":"crossref","unstructured":"Littman, M.L.: Markov games as a framework for multi-agent reinforcement learning. In: ICML (1994)","key":"22_CR17","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"unstructured":"Liu, S., Lever, G., Merel, J., Tunyasuvunakool, S., Heess, N., Graepel, T.: Emergent coordination through competition. In: ICLR (2019)","key":"22_CR18"},{"unstructured":"Mahajan, A., Rashid, T., Samvelyan, M., Whiteson, S.: MAVEN: Multi-agent variational exploration (2019). arXiv:1910.07483","key":"22_CR19"},{"unstructured":"Mnih, V., et al.: Playing Atari with deep reinforcement learning (2013). arXiv:1312.5602","key":"22_CR20"},{"doi-asserted-by":"crossref","unstructured":"Mordatch, I., Abbeel, P.: Emergence of grounded compositional language in multi-agent populations. In: AAAI (2017)","key":"22_CR21","DOI":"10.1609\/aaai.v32i1.11492"},{"unstructured":"Paquette, P., et al.: No press diplomacy: Modeling multi-agent gameplay (2019). arXiv:1909.02128","key":"22_CR22"},{"unstructured":"P\u00e9rez-Li\u00e9bana, D., et al.: The multi-agent reinforcement learning in malm\u00d6 (marl\u00d6) competition (2019). arXiv:1901.08129","key":"22_CR23"},{"unstructured":"Rashid, T., Samvelyan, M., Schroeder, C., Farquhar, G., Foerster, J., Whiteson, S.: QMIX: Monotonic value function factorisation for deep multi-agent reinforcement learning. In: ICML (2018)","key":"22_CR24"},{"unstructured":"Samvelyan, M., et al.: The StarCraft multi-agent challenge (2019). arXiv:1902.04043","key":"22_CR25"},{"unstructured":"Savva, M., Chang, A.X., Dosovitskiy, A., Funkhouser, T.A., Koltun, V.: MINOS: Multimodal indoor simulator for navigation in complex environments (2017). arXiv:1712.03931","key":"22_CR26"},{"doi-asserted-by":"crossref","unstructured":"Savva, M., et\u00a0al.: Habitat: a platform for embodied AI research. In: ICCV (2019)","key":"22_CR27","DOI":"10.1109\/ICCV.2019.00943"},{"unstructured":"Schulman, J., Moritz, P., Levine, S., Jordan, M., Abbeel, P.: High-dimensional continuous control using generalized advantage estimation. In: ICLR 2016 (2016)","key":"22_CR28"},{"unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms (2017). arXiv:1707.06347","key":"22_CR29"},{"unstructured":"Sutton, R.S., McAllester, D.A., Singh, S.P., Mansour, Y., et\u00a0al.: Policy gradient methods for reinforcement learning with function approximation. In: NeurIPS (1999)","key":"22_CR30"},{"unstructured":"Wang, T., Gupta, T., Mahajan, A., Peng, B., Whiteson, S., Zhang, C.: RODE: Learning roles to decompose multi-agent tasks (2020). arXiv:2010.01523","key":"22_CR31"},{"unstructured":"Wani, S., Patel, S., Jain, U., Chang, A.X., Savva, M.: MultiON: Benchmarking semantic map memory using multi-object navigation (2020). arXiv:2012.03912","key":"22_CR32"},{"unstructured":"de\u00a0Witt, C.S., et al.: Is independent learning all you need in the StarCraft multi-agent challenge? CoRR (2020)","key":"22_CR33"},{"doi-asserted-by":"crossref","unstructured":"Xia, F., et al.: Interactive Gibson benchmark: a benchmark for interactive navigation in cluttered environments. IEEE Robot. Autom. Lett. 5(2), 713\u2013720 (2020)","key":"22_CR34","DOI":"10.1109\/LRA.2020.2965078"},{"doi-asserted-by":"crossref","unstructured":"Xia, F., Zamir, A.R., He, Z., Sax, A., Malik, J., Savarese, S.: Gibson Env: real-world perception for embodied agents. In: CVPR (2018)","key":"22_CR35","DOI":"10.1109\/CVPR.2018.00945"},{"unstructured":"Yang, Y., et al.: Multi-agent determinantal Q-learning. In: ICML (2020)","key":"22_CR36"},{"unstructured":"Yu, C., Velu, A., Vinitsky, E., Wang, Y., Bayen, A.M., Wu, Y.: The surprising effectiveness of MAPPO in cooperative, multi-agent games (2021). arXiv:2103.01955","key":"22_CR37"},{"unstructured":"Yu, C., Velu, A., Vinitsky, E., Wang, Y., Bayen, A.M., Wu, Y.: The surprising effectiveness of MAPPO in cooperative, multi-agent games. CoRR (2021)","key":"22_CR38"},{"unstructured":"Zabounidis, R., Campbell, J., Stepputtis, S., Hughes, D., Sycara, K.P.: Concept learning for interpretable multi-agent reinforcement learning. In: Conference on Robot Learning, pp. 1828\u20131837. PMLR (2023)","key":"22_CR39"}],"container-title":["Lecture Notes in Computer Science","Advances in Knowledge Discovery and Data Mining"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-2253-2_22","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,24]],"date-time":"2024-04-24T23:14:15Z","timestamp":1714000455000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-2253-2_22"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9789819722525","9789819722532"],"references-count":39,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-2253-2_22","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"25 April 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PAKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Pacific-Asia Conference on Knowledge Discovery and Data Mining","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Taipei","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Taiwan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 May 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 May 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"pakdd2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/pakdd2024.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}