{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T11:30:42Z","timestamp":1775129442123,"version":"3.50.1"},"reference-count":22,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,12,12]],"date-time":"2024-12-12T00:00:00Z","timestamp":1733961600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,12]],"date-time":"2024-12-12T00:00:00Z","timestamp":1733961600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001381","name":"National Research Foundation Singapore","doi-asserted-by":"publisher","award":["AISG2-GC-2023-007"],"award-info":[{"award-number":["AISG2-GC-2023-007"]}],"id":[{"id":"10.13039\/501100001381","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001348","name":"A*STAR","doi-asserted-by":"publisher","award":["M23M4a0067"],"award-info":[{"award-number":["M23M4a0067"]}],"id":[{"id":"10.13039\/501100001348","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,12,12]]},"DOI":"10.1109\/icarcv63323.2024.10821634","type":"proceedings-article","created":{"date-parts":[[2025,1,9]],"date-time":"2025-01-09T19:36:27Z","timestamp":1736451387000},"page":"801-806","source":"Crossref","is-referenced-by-count":1,"title":["Assessment of Multi-Agent Reinforcement Learning Strategies for Multi-Agent Negotiation"],"prefix":"10.1109","author":[{"given":"Hongyi","family":"Li","sequence":"first","affiliation":[{"name":"College of Design and Engineering, National University of Singapore,Faculty of Mechanical Engineering,Singapore"}]},{"given":"Ruihang","family":"Ji","sequence":"additional","affiliation":[{"name":"National University of Singapore,Department of Electrical and Computer Engineering,Singapore,Singapore,117576"}]},{"given":"Shuzhi Sam","family":"Ge","sequence":"additional","affiliation":[{"name":"National University of Singapore,Department of Electrical and Computer Engineering,Singapore,Singapore,117576"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-14435-6_7"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.11591\/ijece.v12i4.pp3517-3529"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2024.111753"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-60990-0_12"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/j.cogr.2023.07.004"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/icra57147.2024.10611301"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561359"},{"key":"ref8","article-title":"Isaac gym: High performance gpu-based physics simulation for robot learning","author":"Makoviychuk","year":"2021","journal-title":"arXiv preprint"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.aei.2005.06.002"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2024.3440005"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794102"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561315"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCC.2007.913919"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.5555\/3016100.3016191"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-71682-4_5"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0172395"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TMECH.2020.2998076"},{"key":"ref19","author":"Nachum","year":"2019","journal-title":"Multi-agent manipulation via locomotion using hierarchical sim2real"},{"key":"ref20","first-page":"278","article-title":"Policy invariance under reward transformations: Theory and application to reward shaping","volume-title":"Icml","volume":"99","author":"Ng","year":"1999"},{"key":"ref21","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","volume-title":"International conference on machine learning","author":"Mnih"},{"key":"ref22","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv preprint"}],"event":{"name":"2024 18th International Conference on Control, Automation, Robotics and Vision (ICARCV)","location":"Dubai, United Arab Emirates","start":{"date-parts":[[2024,12,12]]},"end":{"date-parts":[[2024,12,15]]}},"container-title":["2024 18th International Conference on Control, Automation, Robotics and Vision (ICARCV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10821514\/10821497\/10821634.pdf?arnumber=10821634","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,14]],"date-time":"2025-01-14T19:39:34Z","timestamp":1736883574000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10821634\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,12]]},"references-count":22,"URL":"https:\/\/doi.org\/10.1109\/icarcv63323.2024.10821634","relation":{},"subject":[],"published":{"date-parts":[[2024,12,12]]}}}