{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T11:18:29Z","timestamp":1780053509299,"version":"3.54.0"},"reference-count":93,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"NSF RTML","award":["CCF-1937403"],"award-info":[{"award-number":["CCF-1937403"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1109\/lra.2025.3562371","type":"journal-article","created":{"date-parts":[[2025,4,18]],"date-time":"2025-04-18T13:38:37Z","timestamp":1744983517000},"page":"5681-5688","source":"Crossref","is-referenced-by-count":29,"title":["LLM-Based Multi-Agent Decision-Making: Challenges and Future Directions"],"prefix":"10.1109","volume":"10","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7524-9044","authenticated-orcid":false,"given":"Chuanneng","family":"Sun","sequence":"first","affiliation":[{"name":"Department of Electrical and Computer Engineering, Rutgers University&#x2013;New Brunswick, New Brunswick, NJ, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-9900-0530","authenticated-orcid":false,"given":"Songjun","family":"Huang","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, Rutgers University&#x2013;New Brunswick, New Brunswick, NJ, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5365-509X","authenticated-orcid":false,"given":"Dario","family":"Pompili","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, Rutgers University&#x2013;New Brunswick, New Brunswick, NJ, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161019"},{"key":"ref2","article-title":"Safe, multi-agent, reinforcement learning for autonomous driving","author":"Shalev-Shwartz","year":"2016"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/MASS50613.2020.00030"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/DCOSS-IoT61029.2024.00060"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/DCOSS-IoT61029.2024.00014"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/S0166-4115(97)80111-2"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i8.26129"},{"key":"ref10","article-title":"Language as an abstraction for hierarchical deep reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Jiang","year":"2019"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i12.17326"},{"key":"ref12","article-title":"ChatGPT: Optimizing language models for dialogue","year":"2023"},{"key":"ref13","article-title":"Llama 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023"},{"issue":"240","key":"ref14","first-page":"1","article-title":"PaLM: Scaling language modeling with pathways","volume":"24","author":"Chowdhery","year":"2023","journal-title":"J. Mach. Learn. Res."},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00543"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00515"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICHI61247.2024.00057"},{"key":"ref18","article-title":"Emergence of language with multi-agent games: Learning to communicate with sequences of symbols","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Havrylov","year":"2017"},{"key":"ref19","first-page":"8634","article-title":"Reflexion: Language agents with verbal reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Shinn","year":"2024"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2024\/890"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2020.2977374"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-019-09421-1"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-021-09996-w"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/880"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2024.3497992"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-28929-8"},{"issue":"1","key":"ref27","first-page":"7234","article-title":"Monotonic value function factorisation for deep multi-agent reinforcement learning","volume":"21","author":"Rashid","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref28","first-page":"5887","article-title":"QTRAN: Learning to factorize with transformation for cooperative multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Son","year":"2019"},{"key":"ref29","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Lowe","year":"2017"},{"key":"ref30","first-page":"24611","article-title":"The surprising effectiveness of PPO in cooperative multi-agent games","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Yu","year":"2022"},{"key":"ref31","first-page":"2085","article-title":"Value-decomposition networks for cooperative multi-agent learning based on team reward","volume-title":"Proc. 17th Int. Conf. Auton. Agents MultiAgent Syst.","author":"Sunehag","year":"2018"},{"key":"ref32","first-page":"10199","article-title":"Weighted QMIX: Expanding monotonic value function factorisation for deep multi-agent reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Rashid","year":"2020"},{"key":"ref33","article-title":"QPLEX: Duplex dueling multi-agent Q-learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wang","year":"2021"},{"key":"ref34","article-title":"Reducing overestimation bias in multi-agent domains using double centralized critics","author":"Ackermann","year":"2019"},{"key":"ref35","article-title":"DOP: Off-policy multi-agent decomposed policy gradients","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wang","year":"2020"},{"key":"ref36","article-title":"Learning to communicate with deep multi-agent reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Foerster","year":"2016"},{"key":"ref37","first-page":"1538","article-title":"TarMAC: Targeted multi-agent communication","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Das","year":"2019"},{"key":"ref38","article-title":"Learning multiagent communication with backpropagation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Sukhbaatar","year":"2016"},{"key":"ref39","article-title":"VAIN: Attentional multi-agent predictive modeling","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Hoshen","year":"2017"},{"key":"ref40","article-title":"Learning attentional communication for multi-agent cooperation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Jiang","year":"2018"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11492"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413716"},{"key":"ref43","article-title":"Networked multi-agent reinforcement learning with emergent communication","author":"Gupta","year":"2020"},{"key":"ref44","article-title":"Emergent multi-agent communication in the deep learning era","author":"Lazaridou","year":"2020"},{"key":"ref45","article-title":"React: Synergizing reasoning and acting in language models","volume-title":"Proc. 11th Int. Conf. Learn. Representations","author":"Yao","year":"2023"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-naacl.264"},{"key":"ref47","article-title":"Retrieval-augmented hierarchical in-context reinforcement learning and hindsight modular reflections for task planning with LLMs","author":"Sun","year":"2024"},{"key":"ref48","first-page":"1100","article-title":"REFINER: Reasoning feedback on intermediate representations","volume-title":"Proc. 18th Conf. Eur. Chapter Assoc. Comput. Linguistics 2024","author":"Paul","year":"2023"},{"key":"ref49","article-title":"A simple framework for intrinsic reward-shaping for RL using LLM feedback","author":"Zhang","year":"2024"},{"key":"ref50","article-title":"Retroformer: Retrospective large language agents with policy gradient optimization","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Yao","year":"2024"},{"key":"ref51","article-title":"REX: Rapid exploration and exploitation for AI agents","volume":"abs\/2307.08962","author":"Murthy","year":"2023","journal-title":"CoRR"},{"key":"ref52","first-page":"8469","article-title":"Palm-e: An embodied multimodal language model","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Driess","year":"2023"},{"key":"ref53","first-page":"9118","article-title":"Language models as zero-shot planners: Extracting actionable knowledge for embodied agents","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Huang","year":"2022"},{"key":"ref54","first-page":"287","article-title":"Do as I can, not as I say: Grounding language in robotic affordances","volume-title":"Proc. Conf. Robot Learn.","author":"Brohan","year":"2023"},{"key":"ref55","article-title":"Dynamic LLM-agent network: An LLM-agent collaboration framework with agent team optimization","volume":"abs\/2310.02170","author":"Liu","year":"2023","journal-title":"CoRR"},{"key":"ref56","article-title":"Multi-agent consensus seeking via large language models","author":"Chen","year":"2023"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.13"},{"key":"ref58","article-title":"Building cooperative embodied agents modularly with large language models","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Zhang","year":"2024"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10802322"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10610855"},{"key":"ref61","article-title":"Co-NavGPT: Multi-robot cooperative visual semantic navigation using large language models","author":"Yu","year":"2023"},{"key":"ref62","article-title":"Embodied LLM agents learn to cooperate in organized teams","volume-title":"Proc. Lang. Gamification - NeurIPS 2024 Workshop","author":"Guo","year":"2024"},{"key":"ref63","article-title":"MetaGPT: Meta programming for multi-agent collaborative framework","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Hong","year":"2023"},{"key":"ref64","first-page":"87908","article-title":"Language grounded multi-agent reinforcement learning with human-interpretable communication","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"37","author":"Li","year":"2025"},{"key":"ref65","article-title":"How far are we on the decision-making of LLMs? Evaluating LLMs\u2019 gaming ability in multi-agent environments","volume":"abs\/2403.11807","author":"Huang","year":"2024","journal-title":"CoRR"},{"key":"ref66","article-title":"A comparison of deepseek and other LLMs","author":"Gao","year":"2025"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-024-00976-7"},{"key":"ref68","article-title":"AutoGen: Enabling next-gen LLM applications via multi-agent conversation","volume-title":"Proc. ICLR 2024 Workshop Large Lang. Model Agents","author":"Wu","year":"2024"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1145\/3586183.3606763"},{"key":"ref70","first-page":"51991","article-title":"Camel: Communicative agents for mind exploration of large language model society","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Li","year":"2024"},{"key":"ref71","article-title":"GraphSnapShot: Graph machine learning acceleration with fast storage and retrieval","volume":"abs\/2406.17918","author":"Liu","year":"2024","journal-title":"CoRR"},{"key":"ref72","article-title":"Distance recomputator and topology reconstructor for graph neural networks","author":"Liu","year":"2024"},{"key":"ref73","article-title":"Agent-agnostic human-in-the-loop reinforcement learning","author":"Abel","year":"2017"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/CAC.2017.8243575"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3289315"},{"key":"ref76","article-title":"Deep reinforcement learning from human preferences","volume-title":"Proc. Neural Inf. Process. Syst.","volume":"30","author":"Christiano","year":"2017"},{"key":"ref77","article-title":"LoRA: Low-rank adaptation of large language models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hu","year":"2022"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i14.29540"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i14.29541"},{"key":"ref80","article-title":"Parameter-efficient fine-tuning for pre-trained vision models: A survey","author":"Xin","year":"2024"},{"key":"ref81","article-title":"LLMEasyQuantan easy to use toolkit for LLM quantization","author":"Liu","year":"2024"},{"key":"ref82","article-title":"In-context learning distillation: Transferring few-shot learning ability of pre-trained language models","author":"Huang","year":"2022"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.372"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.195"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1031"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1202"},{"key":"ref87","article-title":"Phi-3 technical report: A highly capable language model locally on your phone","volume":"abs\/2404.14219","author":"Abdin","year":"2024","journal-title":"CoRR"},{"key":"ref88","first-page":"2409","article-title":"Multi-agent deep reinforcement learning for high-frequency multi-market making","volume-title":"Proc. Int. Conf. Auton. Agents Multiagent Syst.","author":"Kumar","year":"2023"},{"key":"ref89","article-title":"Reinforcement learning in high-frequency market making","volume":"abs\/2407.21025","author":"Zheng","year":"2024","journal-title":"CoRR"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOMWKSHPS50562.2020.9162984"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1109\/secon64284.2024.10934854"},{"key":"ref92","first-page":"671","article-title":"Learning decentralized controllers for robot swarms with graph neural networks","volume-title":"Proc. Conf. Robot Learn.","author":"Tolstaya","year":"2020"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1109\/IOTM.001.2300102"}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/7083369\/10969146\/10970024-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7083369\/10969146\/10970024.pdf?arnumber=10970024","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,19]],"date-time":"2025-11-19T18:47:40Z","timestamp":1763578060000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10970024\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6]]},"references-count":93,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/lra.2025.3562371","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"value":"2377-3766","type":"electronic"},{"value":"2377-3774","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,6]]}}}