{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T16:38:48Z","timestamp":1780418328276,"version":"3.54.1"},"reference-count":42,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,5,19]]},"DOI":"10.1109\/icra55743.2025.11128370","type":"proceedings-article","created":{"date-parts":[[2025,9,2]],"date-time":"2025-09-02T17:28:56Z","timestamp":1756834136000},"page":"15021-15028","source":"Crossref","is-referenced-by-count":1,"title":["Multi-Agent Inverse Q-Learning from Demonstrations"],"prefix":"10.1109","author":[{"given":"Nathaniel","family":"Haynam","sequence":"first","affiliation":[{"name":"UC Berkeley,Electrical Engineering and Computer Sciences"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Adam","family":"Khoja","sequence":"additional","affiliation":[{"name":"UC Berkeley,Electrical Engineering and Computer Sciences"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Dhruv","family":"Kumar","sequence":"additional","affiliation":[{"name":"UC Berkeley,Electrical Engineering and Computer Sciences"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Vivek","family":"Myers","sequence":"additional","affiliation":[{"name":"UC Berkeley,Electrical Engineering and Computer Sciences"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Erdem","family":"Biyik","sequence":"additional","affiliation":[{"name":"UC Berkeley,Electrical Engineering and Computer Sciences"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","author":"Allegra","year":"2020","journal-title":"Inverse Reinforcement Learning for Autonomous Driving"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2020.xvi.039"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.enbuild.2023.112941"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10096237"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557105"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-02044-5_3"},{"key":"ref7","first-page":"7194","article-title":"Multi-Agent Adversarial Inverse Reinforcement Learning","volume-title":"International Conference on Machine Learning. PMLR","author":"Yu"},{"key":"ref8","article-title":"Multi-Agent Actor-Critic for Mixed Cooperative-Competitive Environments","volume-title":"Neural Information Processing Systems","volume":"30","author":"Lowe","year":"2017"},{"key":"ref9","author":"Xue","year":"2022","journal-title":"Multi-Agent Dynamic Algorithm Configuration"},{"key":"ref10","author":"Carroll","year":"2020","journal-title":"On the Utility of Learning About Humans for Human-AI Coordination"},{"key":"ref11","volume-title":"An Environment for Autonomous Driving Decision-Making","author":"Leurent","year":"2018"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143936"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"ref14","first-page":"2","article-title":"Algorithms for Inverse Reinforcement Learning","volume":"1","author":"Ng","year":"2000","journal-title":"ICML"},{"key":"ref15","first-page":"2586","article-title":"Bayesian Inverse Reinforcement Learning","volume":"7","author":"Ramachandran","year":"2007","journal-title":"IJCAI"},{"key":"ref16","article-title":"Generative Adversarial Imitation Learning","volume-title":"Neural Information Processing Systems","volume":"29","author":"Ho","year":"2016"},{"key":"ref17","author":"Waugh","year":"2013","journal-title":"Computational Ratio-nalization: The Inverse Equilibrium Problem"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1177\/02783649211041652"},{"key":"ref19","article-title":"Maximum Entropy Inverse Reinforcement Learning","volume-title":"AAAI Conference on Artificial Intelligence","author":"Ziebart"},{"key":"ref20","article-title":"Maximum Entropy RL (Provably) Solves Some Robust RL Problems","volume-title":"International Conference on Learning Representations","author":"Eysenbach"},{"key":"ref21","first-page":"4028","article-title":"IQ-Learn: Inverse Soft-Q Learning for Imitation","volume-title":"Neural Information Processing Systems","volume":"34","author":"Garg","year":"2021"},{"key":"ref22","article-title":"Better- Than-Demonstrator Imitation Learning via Automatically-Ranked Demonstrations","volume-title":"Conference on Robot Learning","author":"Brown"},{"key":"ref23","first-page":"627","article-title":"A Reduction of Imitation Learning and Structured Prediction to No\u2013 Regret Online Learning","volume-title":"International Conference on Artificial Intelligence and Statistics. JMLR Workshop and Conference Proceedings","author":"Ross"},{"key":"ref24","first-page":"55681","article-title":"Roboclip: One demonstration is enough to learn robot policies","volume":"36","author":"Sontakke","year":"2023","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CDC45484.2021.9683740"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2021.XVII.030"},{"key":"ref27","article-title":"On the Critical Role of Conventions in Adaptive Human-AI Collaboration","volume-title":"International Conference on Learning Representations","author":"Shih"},{"key":"ref28","first-page":"4399","article-title":"Other-Play\u201d for Zero-Shot Coordination","volume-title":"International Conference on Machine Learning. PMLR","author":"Hu"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-60990-0_12"},{"key":"ref30","author":"Kalogiannis","year":"2022","journal-title":"Efficiently Computing Nash Equilibria in Adversarial Team Markov Games"},{"key":"ref31","author":"Haarnoja","year":"2018","journal-title":"Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning With a Stochastic Actor"},{"key":"ref32","author":"Knott","year":"2021","journal-title":"Evaluating the Robustness of Collaborative Agents"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.3233\/faia230486"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i5.25758"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/tits.2023.3285442"},{"key":"ref36","author":"Chen","year":"2020","journal-title":"Delay-Aware Multi-Agent Reinforcement Learning for Cooperative and Competitive Environments"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58583-9_28"},{"key":"ref38","author":"Leurent","year":"2019","journal-title":"Social Attention for Autonomous Decision-Making in Dense Traffic"},{"key":"ref39","author":"Jeon","year":"2020","journal-title":"Scalable Multi-Agent Inverse Reinforcement Learning via Actor-Attention-Critic"},{"key":"ref40","article-title":"The Boltzmann Policy Distribution: Accounting for Systematic Suboptimality in Human Models","volume-title":"International Conference on Learning Representations","author":"Laidlaw"},{"key":"ref41","first-page":"43","article-title":"When Humans Aren\u2019t Optimal: Robots That Collaborate With Risk-Aware Humans","volume-title":"ACM\/IEEE International Conference on Human-Robot Interaction","author":"Kwon"},{"key":"ref42","author":"Chan","year":"2021","journal-title":"Human Irrationality: Both Bad and Good for Reward Inference"}],"event":{"name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","location":"Atlanta, GA, USA","start":{"date-parts":[[2025,5,19]]},"end":{"date-parts":[[2025,5,23]]}},"container-title":["2025 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11127273\/11127223\/11128370.pdf?arnumber=11128370","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,3]],"date-time":"2025-09-03T06:46:24Z","timestamp":1756881984000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11128370\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,19]]},"references-count":42,"URL":"https:\/\/doi.org\/10.1109\/icra55743.2025.11128370","relation":{},"subject":[],"published":{"date-parts":[[2025,5,19]]}}}