{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T18:14:26Z","timestamp":1764785666980,"version":"build-2065373602"},"reference-count":27,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000006","name":"Office of Naval Research","doi-asserted-by":"publisher","award":["N000142212474"],"award-info":[{"award-number":["N000142212474"]}],"id":[{"id":"10.13039\/100000006","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000183","name":"Army Research Office","doi-asserted-by":"publisher","award":["W911NF2110103","W911NF2310363"],"award-info":[{"award-number":["W911NF2110103","W911NF2310363"]}],"id":[{"id":"10.13039\/100000183","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Control Syst. Lett."],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/lcsys.2025.3591199","type":"journal-article","created":{"date-parts":[[2025,7,21]],"date-time":"2025-07-21T18:10:39Z","timestamp":1753121439000},"page":"2315-2320","source":"Crossref","is-referenced-by-count":1,"title":["Robust Human-Machine Teaming Through Reinforcement Learning From Failure via Sparse Reward Densification"],"prefix":"10.1109","volume":"9","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-2960-0787","authenticated-orcid":false,"given":"Mingkang","family":"Wu","sequence":"first","affiliation":[{"name":"Department of Electrical and Computer Engineering, University of Texas at San Antonio, San Antonio, TX, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3383-0185","authenticated-orcid":false,"given":"Yongcan","family":"Cao","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, University of Texas at San Antonio, San Antonio, TX, USA"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3912"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3422622"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-59497-3_175"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"ref5","first-page":"4572","article-title":"Generative adversarial imitation learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Ho"},{"key":"ref6","first-page":"2469","article-title":"Policy optimization with demonstrations","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Kang"},{"key":"ref7","article-title":"Continuous control with deep reinforcement learning","author":"Lillicrap","year":"2015","journal-title":"arXiv:1509.02971"},{"key":"ref8","first-page":"23803","article-title":"Cross-entropy loss functions: Theoretical analysis and applications","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Mao"},{"key":"ref9","article-title":"Playing Atari with deep reinforcement learning","author":"Mnih","year":"2013","journal-title":"arXiv:1312.5602"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"ref11","article-title":"Multi-goal reinforcement learning: Challenging robotics environments and request for research","author":"Plappert","year":"2018","journal-title":"arXiv:1802.09464"},{"key":"ref12","first-page":"12","volume-title":"AI, OR and Control Theory: A Rosetta Stone for Stochastic Optimization","author":"Powell","year":"2012"},{"key":"ref13","first-page":"1040","article-title":"Learning from demonstration","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Schaal"},{"key":"ref14","first-page":"1889","article-title":"Trust region policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Schulman"},{"key":"ref15","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv:1707.06347"},{"key":"ref16","first-page":"1060","article-title":"Inverse reinforcement learning from failure","volume-title":"Proc. Int. Conf. Auton. Agents Multiagent Syst.","author":"Shiarlis"},{"key":"ref17","first-page":"447","article-title":"Exploration from demonstration for interactive reinforcement learning","volume-title":"Proc. Int. Conf. Auton. Agents Multiagent Syst.","author":"Subramanian"},{"key":"ref18","first-page":"1449","article-title":"A game-theoretic approach to apprenticeship learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Syed"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/687"},{"key":"ref20","article-title":"Gymnasium: A standard interface for reinforcement learning environments","author":"Towers","year":"2024","journal-title":"arXiv:2407.17032"},{"key":"ref21","article-title":"Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards","author":"Vecerik","year":"2017","journal-title":"arXiv:1707.08817"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i9.28886"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICMI60790.2024.10585936"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.2514\/6.2023-0505"},{"key":"ref25","article-title":"RbRL2. 0: Integrated reward and policy learning for rating-based reinforcement learning","author":"Wu","year":"2025","journal-title":"arXiv:2501.07502"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8968063"},{"volume-title":"Modeling Purposeful Adaptive Behavior with the Principle of Maximum Causal Entropy","year":"2010","author":"Ziebart","key":"ref27"}],"container-title":["IEEE Control Systems Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7782633\/10939047\/11087555.pdf?arnumber=11087555","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T04:35:55Z","timestamp":1760675755000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11087555\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":27,"URL":"https:\/\/doi.org\/10.1109\/lcsys.2025.3591199","relation":{},"ISSN":["2475-1456"],"issn-type":[{"type":"electronic","value":"2475-1456"}],"subject":[],"published":{"date-parts":[[2025]]}}}