{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T03:45:18Z","timestamp":1743047118013,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":29,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819983902"},{"type":"electronic","value":"9789819983919"}],"license":[{"start":{"date-parts":[[2023,11,27]],"date-time":"2023-11-27T00:00:00Z","timestamp":1701043200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,11,27]],"date-time":"2023-11-27T00:00:00Z","timestamp":1701043200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-99-8391-9_25","type":"book-chapter","created":{"date-parts":[[2023,11,26]],"date-time":"2023-11-26T16:02:21Z","timestamp":1701014541000},"page":"310-324","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Cooperative Multi-Agent Reinforcement Learning with\u00a0Dynamic Target Localization: A Reward Sharing Approach"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2644-434X","authenticated-orcid":false,"given":"Helani","family":"Wickramaarachchi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6030-858X","authenticated-orcid":false,"given":"Michael","family":"Kirley","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0069-2281","authenticated-orcid":false,"given":"Nicholas","family":"Geard","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,11,27]]},"reference":[{"key":"25_CR1","unstructured":"Yang, J., Borovikov, I., Zha, H.: Hierarchical cooperative multi-agent reinforcement learning with skill discovery. In: Adaptive Agents and Multi-Agent Systems (2019)"},{"key":"25_CR2","unstructured":"Multi-agent Reinforcement Learning: Independent vs. Cooperative Agents. Morgan Kaufmann Publishers Inc., San Francisco (1997)"},{"key":"25_CR3","doi-asserted-by":"crossref","unstructured":"Hu, Z., Zhao, D.: Reinforcement learning for multi-agent patrol policy. In: 9th IEEE International Conference on Cognitive Informatics (ICCI\u201910), pp. 530\u2013535 (2010)","DOI":"10.1109\/COGINF.2010.5599681"},{"key":"25_CR4","unstructured":"Claus, C., Boutilier, C.: The dynamics of reinforcement learning in cooperative multiagent systems. In: AAAI\/IAAI (1998)"},{"key":"25_CR5","unstructured":"Rashid, T., Samvelyan, M., De Witt, C.S., Farquhar, G., Foerster, J., Whiteson, S.: QMIX: monotonic value function factorisation for deep multi-agent reinforcement learning (2018). ArXiv, abs\/1803.11485"},{"key":"25_CR6","doi-asserted-by":"crossref","unstructured":"Marzari, L., Pore, A., Dall\u2019Alba, D., Aragon-Camarasa, G., Farinelli, A., Fiorini, P.: Towards hierarchical task decomposition using deep reinforcement learning for pick and place subtasks. In: 2021 20th International Conference on Advanced Robotics (ICAR), pp. 640\u2013645 (2021)","DOI":"10.1109\/ICAR53236.2021.9659344"},{"key":"25_CR7","doi-asserted-by":"crossref","unstructured":"Chaimowicz, L., Campos, M.F., Kumar, V.: Dynamic role assignment for cooperative robots. In: Proceedings 2002 IEEE International Conference on Robotics and Automation (Cat. No.02CH37292), vol. 1, pp. 293\u2013298 (2002)","DOI":"10.1109\/ROBOT.2002.1013376"},{"key":"25_CR8","unstructured":"Foerster, J.N., Assael, Y., De Freitas, N., Whiteson, S.: Learning to communicate with deep multi-agent reinforcement learning (2016). ArXiv, abs\/1605.06676"},{"key":"25_CR9","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1007\/978-3-319-71682-4_5","volume-title":"Autonomous Agents and Multiagent Systems","author":"JK Gupta","year":"2017","unstructured":"Gupta, J.K., Egorov, M., Kochenderfer, M.: Cooperative multi-agent control using deep reinforcement learning. In: Sukthankar, G., Rodriguez-Aguilar, J.A. (eds.) AAMAS 2017. LNCS (LNAI), vol. 10642, pp. 66\u201383. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-71682-4_5"},{"key":"25_CR10","unstructured":"Mnih, V., et al.: Playing atari with deep reinforcement learning (2013). ArXiv, abs\/1312.5602"},{"key":"25_CR11","unstructured":"Ng, A.Y., Harada, D., Russell, S.: Policy invariance under reward transformations: theory and application to reward shaping. In: International Conference on Machine Learning (1999)"},{"key":"25_CR12","unstructured":"Wiewiora, E., Cottrell, G.W., Elkan, C.: Principled methods for advising reinforcement learning agents. In: Proceedings of the Twentieth International Conference on International Conference on Machine Learning, ICML\u201903, pp. 792\u2013799. AAAI Press (2003)"},{"key":"25_CR13","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1016\/j.neucom.2017.05.090","volume":"263","author":"P Mannion","year":"2017","unstructured":"Mannion, P., Devlin, S., Mason, K., Duggan, J., Howley, E.: Policy invariance under reward transformations for multi-objective reinforcement learning. Neurocomputing 263, 60\u201373 (2017)","journal-title":"Neurocomputing"},{"key":"25_CR14","doi-asserted-by":"publisher","unstructured":"Mannion, P., Devlin, S., Duggan, J., Howley, E.: Reward shaping for knowledge-based multi-objective multi-agent reinforcement learning. Knowl. Eng. Rev. 33, e23 (2018). https:\/\/doi.org\/10.1017\/S0269888918000292. Cambridge University Press","DOI":"10.1017\/S0269888918000292"},{"key":"25_CR15","doi-asserted-by":"publisher","unstructured":"Grze\u015b, M., Kudenko, D.: Multigrid reinforcement learning with reward shaping. In: Kurkov\u00e1, V., Neruda, R., Koutn\u00edk, J. (eds.) ICANN 2008. LNCS, vol. 5163, pp. 357\u2013366. Springer, Heidelberg (2008). https:\/\/doi.org\/10.1007\/978-3-540-87536-9_37","DOI":"10.1007\/978-3-540-87536-9_37"},{"key":"25_CR16","doi-asserted-by":"publisher","first-page":"36","DOI":"10.4018\/jats.2009040103","volume":"1","author":"M Grzes","year":"2009","unstructured":"Grzes, M., Kudenko, D.: Reinforcement learning with reward shaping and mixed resolution function approximation. Int. J. Agent Technol. Syst. 1, 36\u201354 (2009)","journal-title":"Int. J. Agent Technol. Syst."},{"key":"25_CR17","doi-asserted-by":"publisher","first-page":"256","DOI":"10.1016\/j.csl.2015.03.007","volume":"34","author":"E Ferreira","year":"2015","unstructured":"Ferreira, E., Lef\u00e8vre, F.: Reinforcement-learning based dialogue system for human-robot interactions with socially-inspired rewards. Comput. Speech Lang. 34, 256\u2013274 (2015)","journal-title":"Comput. Speech Lang."},{"key":"25_CR18","unstructured":"Devlin, S., Yliniemi, L., Kudenko, D., Tumer, K.: Potential-based difference rewards for multiagent reinforcement learning. In: Adaptive Agents and Multi-Agent Systems (2014)"},{"key":"25_CR19","unstructured":"Kim, D., et al.: Learning to schedule communication in multi-agent reinforcement learning (2019). ArXiv, abs\/1902.01554"},{"key":"25_CR20","unstructured":"Hostallero, D.E., Kim, D., Moon, S., Son, K., Kang, W.J., Yi, Y.: Inducing cooperation through reward reshaping based on peer evaluations in deep multi-agent reinforcement learning. In: AAMAS (2020)"},{"key":"25_CR21","unstructured":"Co-Reyes, J.D., Sanjeev, S., Berseth, G., Gupta, A., Levine, S.: Ecological reinforcement learning (2020). ArXiv, abs\/2006.12478"},{"key":"25_CR22","doi-asserted-by":"publisher","DOI":"10.1016\/j.aei.2022.101800","volume":"54","author":"B Huang","year":"2022","unstructured":"Huang, B., Jin, Y.: Reward shaping in multiagent reinforcement learning for self-organizing systems in assembly tasks. Adv. Eng. Inform. 54, 101800 (2022)","journal-title":"Adv. Eng. Inform."},{"key":"25_CR23","doi-asserted-by":"crossref","unstructured":"Konidaris, G.D., Barto, A.G.: Autonomous shaping: knowledge transfer in reinforcement learning. In: Proceedings of the 23rd International Conference on Machine Learning (2006)","DOI":"10.1145\/1143844.1143906"},{"key":"25_CR24","doi-asserted-by":"publisher","unstructured":"Rou\u010dek, T., et al.: DARPA subterranean challenge: multi-robotic exploration of underground environments. In: Mazal, J., Fagiolini, A., Vasik, P. (eds.) MESAS 2019. LNCS, vol. 11995, pp. 274\u2013290. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-43890-6_22","DOI":"10.1007\/978-3-030-43890-6_22"},{"key":"25_CR25","unstructured":"Stone, P., Veloso, M.: Multiagent systems: a survey from a machine learning perspective (2000)"},{"key":"25_CR26","doi-asserted-by":"crossref","unstructured":"Chen, X., Ghadirzadeh, A., Bj\u00f6rkman, M., Jensfelt, P.: Meta-learning for multi-objective reinforcement learning (2018)","DOI":"10.1109\/IROS40897.2019.8968092"},{"key":"25_CR27","doi-asserted-by":"crossref","unstructured":"Deep reinforcement learning framework for autonomous driving. Electron. Imaging 2017(19), 70\u201376 (2017)","DOI":"10.2352\/ISSN.2470-1173.2017.19.AVM-023"},{"key":"25_CR28","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms (2017). ArXiv, abs\/1707.06347"},{"key":"25_CR29","unstructured":"Schulman, J., Levine, S., Abbeel, P., Jordan, M., Moritz, P.: Trust region policy optimization (2015). ArXiv, abs\/1502.05477"}],"container-title":["Lecture Notes in Computer Science","AI 2023: Advances in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-99-8391-9_25","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,3]],"date-time":"2024-11-03T10:43:04Z","timestamp":1730630584000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-99-8391-9_25"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,27]]},"ISBN":["9789819983902","9789819983919"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-981-99-8391-9_25","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023,11,27]]},"assertion":[{"value":"27 November 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"AI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Australasian Joint Conference on Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Brisbane, QLD","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Australia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 November 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 December 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ausai2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ajcai2023.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"213","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"23","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"59","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"11% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}