{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,10]],"date-time":"2026-02-10T16:12:38Z","timestamp":1770739958113,"version":"3.49.0"},"reference-count":71,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"name":"Key Research Project of Anhui Provincial Natural Science","award":["2023AH052396"],"award-info":[{"award-number":["2023AH052396"]}]},{"name":"General Program for the Training of Outstanding Young Teachers of Anhui Province","award":["YQYB2024126"],"award-info":[{"award-number":["YQYB2024126"]}]},{"name":"General Program for the Training of Outstanding Young Teachers of Anhui Province","award":["2025AHGXZK30721"],"award-info":[{"award-number":["2025AHGXZK30721"]}]},{"name":"Educational Teaching Research Planning Project of Anhui Vocational and Adult Education Society","award":["AZCJ2024287"],"award-info":[{"award-number":["AZCJ2024287"]}]},{"name":"Educational Teaching Research Planning Project of Anhui Vocational and Adult Education Society","award":["AZCJ2024290"],"award-info":[{"award-number":["AZCJ2024290"]}]},{"name":"Key Natural Science Research Project of Wuhu Institute of Technology","award":["wzyzrzd202312"],"award-info":[{"award-number":["wzyzrzd202312"]}]},{"name":"Key Natural Science Research Project of Wuhu Institute of Technology","award":["wzyzrzd202409"],"award-info":[{"award-number":["wzyzrzd202409"]}]},{"name":"\u201cTalent Project\u201d Outstanding Young Top-Notch Talent Project of Wuhu Institute of Technology","award":["rc2023qnbj03"],"award-info":[{"award-number":["rc2023qnbj03"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2026]]},"DOI":"10.1109\/access.2026.3658373","type":"journal-article","created":{"date-parts":[[2026,1,28]],"date-time":"2026-01-28T21:00:22Z","timestamp":1769634022000},"page":"15928-15944","source":"Crossref","is-referenced-by-count":0,"title":["Learning What They Pretend to Think: Adversarial ToM for Safety-Critical Driving Policies"],"prefix":"10.1109","volume":"14","author":[{"given":"Houhuang","family":"Bi","sequence":"first","affiliation":[{"name":"Wuhu Vocational Technical University, Wuhu, Anhui, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-8549-4098","authenticated-orcid":false,"given":"Jun","family":"Huang","sequence":"additional","affiliation":[{"name":"Wuhu Vocational Technical University, Wuhu, Anhui, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-3688-6296","authenticated-orcid":false,"given":"Yichen","family":"Han","sequence":"additional","affiliation":[{"name":"South China Normal University, Guangzhou, Guangdong, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zeng","family":"Zhang","sequence":"additional","affiliation":[{"name":"South China Normal University, Guangzhou, Guangdong, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1002\/rob.21918"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460487"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2016.XII.029"},{"key":"ref4","first-page":"825","article-title":"Belief state planning for autonomous driving in urban environments","volume-title":"Proc. IEEE Intell. Vehicles Symp. (IV)","author":"Hubmann"},{"key":"ref5","article-title":"Human-centric reward optimization for reinforcement learning-based automated driving using large language models","author":"Zhou","year":"2024","journal-title":"arXiv:2405. 04135"},{"key":"ref6","article-title":"CMAT: A multi-agent collaboration tuning framework for enhancing small language models","author":"Liang","year":"2024","journal-title":"arXiv:2404.01663"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ISCIPT67144.2025.11265471"},{"key":"ref8","article-title":"FALCON: Feedback-driven adaptive long\/short-term memory reinforced coding optimization system","author":"Li","year":"2024","journal-title":"arXiv:2410.21349"},{"key":"ref9","article-title":"Contextual bandits for unbounded context distributions","author":"Zhao","year":"2024","journal-title":"arXiv:2408.09655"},{"key":"ref10","article-title":"Theory of mind as intrinsic motivation for multi-agent reinforcement learning","author":"Oguntola","year":"2023","journal-title":"arXiv:2307.01158"},{"key":"ref11","article-title":"WcDT: World-centric diffusion transformer for traffic scene generation","author":"Yang","year":"2024","journal-title":"arXiv:2404.02082"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794007"},{"key":"ref13","article-title":"ReAgent-V: A reward-driven multi-agent framework for video understanding","author":"Zhou","year":"2025","journal-title":"arXiv:2506.01300"},{"key":"ref14","article-title":"ToM2C: Target-oriented multi-agent communication with theory of mind","author":"Wang","year":"2021","journal-title":"arXiv:2111.09189"},{"key":"ref15","article-title":"SCORE: Story coherence and retrieval enhancement for AI narratives","author":"Yi","year":"2025","journal-title":"arXiv:2503.23512"},{"key":"ref16","first-page":"2295","article-title":"Social navigation planning under uncertainty in mixed traffic environments","volume-title":"Proc. IEEE Int. Conf. Robot. Autom. (ICRA)","author":"Toghi"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-97-9003-6_3"},{"key":"ref18","first-page":"1100","article-title":"Intent-aware adversarial reinforcement learning for safe decision making","volume-title":"Proc. Int. Conf. Auto. Agents MultiAgent Syst.","author":"Oguntola"},{"key":"ref19","article-title":"Self-evolving agents with reflective and memory-augmented abilities","author":"Liang","year":"2024","journal-title":"arXiv:2409.00872"},{"key":"ref20","first-page":"1","article-title":"Belief falsification and recovery in multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Liang"},{"key":"ref21","article-title":"Resurrect mask AutoRegressive modeling for efficient and scalable image generation","author":"Xin","year":"2025","journal-title":"arXiv:2507.13032"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.emnlp-main.1415"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/WACV61041.2025.00595"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2025.129572"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i14.29540"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-30678-5_7"},{"key":"ref27","article-title":"Lumina-image 2.0: A unified and efficient image generative framework","author":"Qin","year":"2025","journal-title":"arXiv:2503.21758"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.52202\/079017-1757"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/3746027.3755141"},{"key":"ref30","article-title":"ReGraP-LLaVA: Reasoning enabled graph-based personalized large language and vision assistant","author":"Xiang","year":"2025","journal-title":"arXiv:2505.03654"},{"key":"ref31","article-title":"See the forest and the trees: A synergistic reasoning framework for knowledge-based visual question answering","author":"Wang","year":"2025","journal-title":"arXiv:2507.17659"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/WACV61041.2025.00604"},{"key":"ref33","article-title":"Enhancing low-cost video editing with lightweight adaptors and temporal-aware inversion","author":"He","year":"2025","journal-title":"arXiv:2501.04606"},{"key":"ref34","first-page":"3040","article-title":"Social influence as intrinsic motivation for multi-agent deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Jaques"},{"key":"ref35","first-page":"1135","article-title":"Socially aware motion prediction with neural attention fields","volume-title":"Proc. Conf. Robot Learn.","author":"Li"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.177\/0278364920917446"},{"key":"ref37","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2025.130476","article-title":"AgentBuilder: Automating agent creation via large language model-driven systems","volume":"646","author":"Tang","year":"2025","journal-title":"Neurocomputing"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2022.3195870"},{"key":"ref39","first-page":"2817","article-title":"Robust adversarial reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Pinto"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8206245"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CDC57313.2025.11312575"},{"issue":"1","key":"ref42","first-page":"1437","article-title":"A comprehensive survey on safe reinforcement learning","volume":"16","author":"Garc\u00eda","year":"2015","journal-title":"J. Mach. Learn. Res."},{"key":"ref43","first-page":"278","article-title":"Policy invariance under reward transformations: Theory and application to reward shaping","volume-title":"Proc. 16th Int. Conf. Mach. Learn. (ICML)","author":"Ng"},{"key":"ref44","volume-title":"Constrained Markov Decision Processes","author":"Altman","year":"1999"},{"key":"ref45","first-page":"1863","article-title":"Lyapunov-based safe policy optimization for continuous control","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Chow"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2020.3009923"},{"key":"ref47","first-page":"2144","article-title":"Learning to communicate with deep multi-agent reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","author":"Foerster"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1007\/s11227-025-07389-2"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CISCE65916.2025.11065245"},{"key":"ref50","article-title":"QFFT, question-free fine-tuning for adaptive reasoning","author":"Liu","year":"2025","journal-title":"arXiv:2506.12860"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVIDL65390.2025.11085477"},{"key":"ref52","first-page":"2970","article-title":"Implicit coordination in multi-agent reinforcement learning via latent interaction graphs","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Everett"},{"key":"ref53","volume-title":"Theory of Games and Economic Behavior","author":"von Neumann","year":"1944"},{"key":"ref54","article-title":"Adversarial attacks on neural network policies","author":"Huang","year":"2017","journal-title":"arXiv:1702.02284"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i12.26753"},{"key":"ref56","volume-title":"An Environment for Autonomous Driving Decision-Making","author":"Leurent","year":"2018"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-060117-105157"},{"key":"ref58","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv:1707.06347"},{"key":"ref59","first-page":"4218","article-title":"Machine theory of mind","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Rabinowitz"},{"key":"ref60","first-page":"4777","article-title":"Learning robust reward functions from adversarial human demonstrations","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","author":"Li"},{"key":"ref61","first-page":"8853","article-title":"Cooperation and competition among autonomous vehicles at intersections","volume-title":"Proc. IEEE Int. Conf. Robot. Autom. (ICRA)","author":"Bouton"},{"key":"ref62","first-page":"802","article-title":"Flow: Architecture and benchmarking for reinforcement learning in traffic control","volume-title":"Proc. Conf. Robot Learn. (CoRL)","author":"Wu"},{"key":"ref63","first-page":"3041","article-title":"Non-conservatively defensive strategy for autonomous vehicles in highway scenarios","volume-title":"Proc. IEEE\/RSJ Int. Conf. Intell. Robots Syst. (IROS)","author":"Zhan"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/TCST.2017.2723574"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/510"},{"key":"ref66","article-title":"Autonomous vehicles in 5G and beyond: A survey","volume":"39","author":"Hakak","year":"2023","journal-title":"Veh. Commun."},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/ojits.2022.3181510"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2022.3150580"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/MCOMSTD.001.1900053"},{"key":"ref70","article-title":"Hierarchical game-theoretic planning for autonomous vehicles","author":"Fisac","year":"2018","journal-title":"arXiv:1810.05766"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/TSUSC.2018.2880127"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6287639\/11323511\/11366238.pdf?arnumber=11366238","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,9]],"date-time":"2026-02-09T21:08:09Z","timestamp":1770671289000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11366238\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"references-count":71,"URL":"https:\/\/doi.org\/10.1109\/access.2026.3658373","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]}}}