{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T17:46:27Z","timestamp":1771955187943,"version":"3.50.1"},"reference-count":35,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1109\/icra57147.2024.10611605","type":"proceedings-article","created":{"date-parts":[[2024,8,8]],"date-time":"2024-08-08T17:51:05Z","timestamp":1723139465000},"page":"9517-9523","source":"Crossref","is-referenced-by-count":3,"title":["Shaping Social Robot to Play Games with Human Demonstrations and Evaluative Feedback"],"prefix":"10.1109","author":[{"given":"Chuanxiong","family":"Zheng","sequence":"first","affiliation":[{"name":"Ocean University of China,School of Information Science and Engineering"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lei","family":"Zhang","sequence":"additional","affiliation":[{"name":"Ocean University of China,School of Information Science and Engineering"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hui","family":"Wang","sequence":"additional","affiliation":[{"name":"Ocean University of China,School of Information Science and Engineering"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Randy","family":"Gomez","sequence":"additional","affiliation":[{"name":"Honda Research Institute Japan Co., Ltd,Wako,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Eric","family":"Nichols","sequence":"additional","affiliation":[{"name":"Honda Research Institute Japan Co., Ltd,Wako,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guangliang","family":"Li","sequence":"additional","affiliation":[{"name":"Ocean University of China,School of Information Science and Engineering"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/S0921-8890(02)00373-1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ROMAN.2016.7745172"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.9914"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2020.XVI.103"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/THMS.2019.2912447"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3006254"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1126\/science.adh8135"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1994.6.2.215"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.18893.74727"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1126\/science.aay2400"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-03051-4"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1126\/science.361.6403.632"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2008.10.024"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-100819-063206"},{"key":"ref17","first-page":"5","article-title":"Combining manual feedback with subsequent mdp reward signals for reinforcement learning","volume-title":"Proceedings of Joint Conference on Autonomous Agents and Multi-Agent Systems (AAMAS)","author":"Knox"},{"key":"ref18","first-page":"661","article-title":"Efficient reductions for imitation learning","volume-title":"Proceedings of the 13th International Conference on Artificial Intelligence and Statistics","author":"Ross"},{"key":"ref19","article-title":"Learning from demonstration to be a good team member in a role playing game","volume-title":"Proceedings of the 26th International FLAIRS Conference","author":"Silva"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.049"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463189"},{"key":"ref22","first-page":"2","article-title":"Algorithms for inverse reinforcement learning","volume-title":"Proceedings of International Conference on Machine Learning (ICML)","volume":"1","author":"Ng"},{"key":"ref23","first-page":"2760","article-title":"Model-free imitation learning with policy optimization","volume-title":"Proceedings of International Conference on Machine Learning (ICML)","author":"Ho"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11485"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ROMAN.2018.8525837"},{"key":"ref26","first-page":"8011","article-title":"Reward learning from human preferences and demonstrations in atari","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"Ibarz"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160939"},{"key":"ref28","article-title":"Generative adversarial imitation learning","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"Ho"},{"key":"ref29","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017"},{"key":"ref30","first-page":"233","article-title":"Haru: hardware design of an experimental tabletop robot assistant","volume-title":"Proceedings of the 2018 ACM\/IEEE International Conference on Human-Robot Interaction (HRI)","author":"Gomez"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/RO-MAN50785.2021.9515525"},{"key":"ref32","article-title":"Model-based reinforcement learning for atari","author":"Kaiser","year":"2019"},{"key":"ref33","first-page":"15032","article-title":"Pettingzoo: Gym for multi-agent reinforcement learning","volume":"34","author":"Terry","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/s12369-008-0001-3"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/3415167"}],"event":{"name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","location":"Yokohama, Japan","start":{"date-parts":[[2024,5,13]]},"end":{"date-parts":[[2024,5,17]]}},"container-title":["2024 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10609961\/10609862\/10611605.pdf?arnumber=10611605","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,11]],"date-time":"2024-08-11T04:19:30Z","timestamp":1723349970000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10611605\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/icra57147.2024.10611605","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]}}}