{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T08:16:48Z","timestamp":1772093808626,"version":"3.50.1"},"publisher-location":"Singapore","reference-count":26,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819549719","type":"print"},{"value":"9789819549726","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T00:00:00Z","timestamp":1764201600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T00:00:00Z","timestamp":1764201600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-4972-6_30","type":"book-chapter","created":{"date-parts":[[2025,11,26]],"date-time":"2025-11-26T08:08:38Z","timestamp":1764144518000},"page":"387-401","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A Hybrid Multi-Agent Reinforcement Learning Framework for\u00a0Decentralised Search-And-Interact Tasks Under Partial Observability"],"prefix":"10.1007","author":[{"given":"Anh Viet","family":"Do","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Eranda","family":"Galhenage","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Aneta","family":"Neumann","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Frank","family":"Neumann","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Anton V.","family":"Uzunov","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Claudia","family":"Szabo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,11,27]]},"reference":[{"key":"30_CR1","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2021.107295","volume":"105","author":"RRO Al-Nima","year":"2021","unstructured":"Al-Nima, R.R.O., Han, T., Al-Sumaidaee, S.A.M., Chen, T., Woo, W.L.: Robustness and performance of deep reinforcement learning. Appl. Soft Comput. 105, 107295 (2021)","journal-title":"Appl. Soft Comput."},{"key":"30_CR2","unstructured":"Albrecht, S.V., Christianos, F., Sch\u00e4fer, L.: Multi-agent reinforcement learning: foundations and modern approaches. MIT Press (2024)"},{"issue":"11","key":"30_CR3","doi-asserted-by":"publisher","first-page":"4948","DOI":"10.3390\/app11114948","volume":"11","author":"L Canese","year":"2021","unstructured":"Canese, L., et al.: Multi-agent reinforcement learning: a review of challenges and applications. Appl. Sci. 11(11), 4948 (2021)","journal-title":"Appl. Sci."},{"issue":"2","key":"30_CR4","doi-asserted-by":"publisher","first-page":"729","DOI":"10.1109\/TWC.2019.2935201","volume":"19","author":"J Cui","year":"2019","unstructured":"Cui, J., Liu, Y., Nallanathan, A.: Multi-agent reinforcement learning-based resource allocation for UAV networks. IEEE Trans. Wireless Commun. 19(2), 729\u2013743 (2019)","journal-title":"IEEE Trans. Wireless Commun."},{"key":"30_CR5","doi-asserted-by":"crossref","unstructured":"Diaz-Vilor, C., Lozano, A., Jafarkhani, H.: A reinforcement learning approach for wildfire tracking with UAV swarms. IEEE Trans. Wireless Commun. (2025)","DOI":"10.1109\/TWC.2024.3524324"},{"key":"30_CR6","doi-asserted-by":"crossref","unstructured":"Guo, J., Chen, Y., Hao, Y., Yin, Z., Yu, Y., Li, S.: Towards comprehensive testing on the robustness of cooperative multi-agent reinforcement learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 115\u2013122 (2022)","DOI":"10.1109\/CVPRW56347.2022.00022"},{"key":"30_CR7","doi-asserted-by":"crossref","unstructured":"Haksar, R.N., Schwager, M.: Distributed deep reinforcement learning for fighting forest fires with a network of aerial robots. In: 2018 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 1067\u20131074. IEEE (2018)","DOI":"10.1109\/IROS.2018.8593539"},{"issue":"8","key":"30_CR8","doi-asserted-by":"publisher","first-page":"1768","DOI":"10.2514\/1.G004106","volume":"42","author":"KD Julian","year":"2019","unstructured":"Julian, K.D., Kochenderfer, M.J.: Distributed wildfire surveillance with autonomous aircraft using deep reinforcement learning. J. Guid. Control. Dyn. 42(8), 1768\u20131778 (2019)","journal-title":"J. Guid. Control. Dyn."},{"key":"30_CR9","unstructured":"Lee, K., Laskin, M., Srinivas, A., Abbeel, P.: SUNRISE: a simple unified framework for ensemble learning in deep reinforcement learning. In: Meila, M., Zhang, T. (eds.) Proceedings of the 38th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol.\u00a0139, pp. 6131\u20136141. PMLR (2021)"},{"key":"30_CR10","unstructured":"Lowe, R., Wu, Y., Tamar, A., Harb, J., Abbeel, P., Mordatch, I.: Multi-agent actor-critic for mixed cooperative-competitive environments. Neural Inf. Process. Syst. (NIPS) (2017)"},{"key":"30_CR11","doi-asserted-by":"crossref","unstructured":"Mordatch, I., Abbeel, P.: Emergence of grounded compositional language in multi-agent populations. arXiv preprint arXiv:1703.04908 (2017)","DOI":"10.1609\/aaai.v32i1.11492"},{"issue":"9","key":"30_CR12","doi-asserted-by":"publisher","first-page":"3826","DOI":"10.1109\/TCYB.2020.2977374","volume":"50","author":"TT Nguyen","year":"2020","unstructured":"Nguyen, T.T., Nguyen, N.D., Nahavandi, S.: Deep reinforcement learning for multiagent systems: a review of challenges, solutions, and applications. IEEE Trans. Cybern. 50(9), 3826\u20133839 (2020)","journal-title":"IEEE Trans. Cybern."},{"issue":"1","key":"30_CR13","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1109\/JSAC.2020.3036962","volume":"39","author":"H Peng","year":"2020","unstructured":"Peng, H., Shen, X.: Multi-agent reinforcement learning based resource management in MEC-and UAV-assisted vehicular networks. IEEE J. Sel. Areas Commun. 39(1), 131\u2013141 (2020)","journal-title":"IEEE J. Sel. Areas Commun."},{"issue":"178","key":"30_CR14","first-page":"1","volume":"21","author":"T Rashid","year":"2020","unstructured":"Rashid, T., Samvelyan, M., de Witt, C.S., Farquhar, G., Foerster, J., Whiteson, S.: Monotonic value function factorisation for deep multi-agent reinforcement learning. J. Mach. Learn. Res. 21(178), 1\u201351 (2020)","journal-title":"J. Mach. Learn. Res."},{"key":"30_CR15","doi-asserted-by":"crossref","unstructured":"Rutherford, A., et al.: JaxMARL: multi-agent RL environments and algorithms in JAX. In: Proceedings of the 23rd International Conference on Autonomous Agents and Multiagent Systems, pp. 2444\u20132446. AAMAS \u201924, International Foundation for Autonomous Agents and Multiagent Systems, Richland, SC (2024)","DOI":"10.65109\/XJUC9898"},{"key":"30_CR16","doi-asserted-by":"publisher","unstructured":"Seraj, E., Silva, A., Gombolay, M.: Multi-UAV planning for cooperative wildfire coverage and tracking with quality-of-service guarantees. Autonomous Agents Multi-Agent Syst. 36(2) (2022). https:\/\/doi.org\/10.1007\/s10458-022-09566-6","DOI":"10.1007\/s10458-022-09566-6"},{"key":"30_CR17","doi-asserted-by":"crossref","unstructured":"Sunehag, P., et al.: Value-decomposition networks for cooperative multi-agent learning based on team reward. In: Proceedings of the 17th International Conference on Autonomous Agents and MultiAgent Systems, pp. 2085\u20132087. AAMAS \u201918, International Foundation for Autonomous Agents and Multiagent Systems, Richland, SC (2018)","DOI":"10.65109\/JSRC7365"},{"key":"30_CR18","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement learning: an introduction. The MIT Press, 2nd edn. (2018)"},{"issue":"4","key":"30_CR19","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0172395","volume":"12","author":"A Tampuu","year":"2017","unstructured":"Tampuu, A., et al.: Multiagent cooperation and competition with deep reinforcement learning. PLoS ONE 12(4), e0172395 (2017). https:\/\/doi.org\/10.1371\/journal.pone.0172395","journal-title":"PLoS ONE"},{"key":"30_CR20","doi-asserted-by":"publisher","unstructured":"Tan, M.: Multi-agent reinforcement learning: independent vs. cooperative agents, pp. 330\u2013337. Elsevier (1993). https:\/\/doi.org\/10.1016\/b978-1-55860-307-3.50049-6","DOI":"10.1016\/b978-1-55860-307-3.50049-6"},{"key":"30_CR21","unstructured":"Viseras, A., Meissner, M., Marchal, J.: Wildfire front monitoring with multiple UAVS using deep Q-learning. IEEE Access (2021)"},{"key":"30_CR22","doi-asserted-by":"publisher","unstructured":"Wang, J., Wang, Y., Zhang, D., Yang, Y., Xiong, R.: Learning hierarchical behavior and motion planning for autonomous driving. In: 2020 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS). IEEE (2020). https:\/\/doi.org\/10.1109\/iros45743.2020.9341647","DOI":"10.1109\/iros45743.2020.9341647"},{"key":"30_CR23","doi-asserted-by":"crossref","unstructured":"Zhang, K., Yang, Z., Ba\u015far, T.: Multi-agent reinforcement learning: a selective overview of theories and algorithms. Handbook of Reinforcement Learning and Control, 321\u2013384 (2021)","DOI":"10.1007\/978-3-030-60990-0_12"},{"key":"30_CR24","first-page":"17271","volume":"33","author":"SQ Zhang","year":"2020","unstructured":"Zhang, S.Q., Zhang, Q., Lin, J.: Succinct and robust multi-agent communication with temporal message control. Adv. Neural. Inf. Process. Syst. 33, 17271\u201317282 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"10","key":"30_CR25","doi-asserted-by":"publisher","first-page":"11599","DOI":"10.1109\/TVT.2020.3014788","volume":"69","author":"Y Zhang","year":"2020","unstructured":"Zhang, Y., Mou, Z., Gao, F., Jiang, J., Ding, R., Han, Z.: UAV-enabled secure communications by multi-agent deep reinforcement learning. IEEE Trans. Veh. Technol. 69(10), 11599\u201311611 (2020)","journal-title":"IEEE Trans. Veh. Technol."},{"key":"30_CR26","unstructured":"Wang, Z., Schaul, T., Hessel, M., Hasselt, H., Lanctot, M., Freitas, N.: Dueling network architectures for deep reinforcement learning. In: Proceedings of The 33rd International Conference on Machine Learning. Proceedings of Machine Learning Research, vol. 48, pp. 1995\u20132003. PMLR (2016)"}],"container-title":["Lecture Notes in Computer Science","AI 2025: Advances in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-4972-6_30","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T07:23:22Z","timestamp":1772090602000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-4972-6_30"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,27]]},"ISBN":["9789819549719","9789819549726"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-4972-6_30","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,27]]},"assertion":[{"value":"27 November 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that\u00a0are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"AI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Australasian Joint Conference on Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Canberra, ACT","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Australia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 December 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 December 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"38","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ausai2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ajcai2025.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}