{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,24]],"date-time":"2026-01-24T13:58:08Z","timestamp":1769263088568,"version":"3.49.0"},"reference-count":71,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012245","name":"Science and Technology Planning Project of Guangdong Province","doi-asserted-by":"publisher","award":["2020B0101100002"],"award-info":[{"award-number":["2020B0101100002"]}],"id":[{"id":"10.13039\/501100012245","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62076100"],"award-info":[{"award-number":["62076100"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62476097"],"award-info":[{"award-number":["62476097"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62276072"],"award-info":[{"award-number":["62276072"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["x2rjD2240100"],"award-info":[{"award-number":["x2rjD2240100"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Guangxi Natural Science Foundation Key Project","award":["2025GXNSFDA069017"],"award-info":[{"award-number":["2025GXNSFDA069017"]}]},{"name":"Guangdong Provincial Fund for Basic and Applied Basic Research-Regional Joint Fund","award":["2023B1515120078"],"award-info":[{"award-number":["2023B1515120078"]}]},{"name":"Guangdong Provincial Natural Science Foundation for Outstanding Youth Team Project","award":["2024B1515040010"],"award-info":[{"award-number":["2024B1515040010"]}]},{"DOI":"10.13039\/501100000272","name":"National Institute for Health Research","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100000272","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Biomedical Research Centre at South London"},{"name":"Maudsley NHS Foundation Trust"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Multimedia"],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/tmm.2025.3543062","type":"journal-article","created":{"date-parts":[[2025,3,5]],"date-time":"2025-03-05T13:51:41Z","timestamp":1741182701000},"page":"4844-4855","source":"Crossref","is-referenced-by-count":1,"title":["Error-Aware Generative Reasoning for Zero-Shot Visual Grounding"],"prefix":"10.1109","volume":"27","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2158-8750","authenticated-orcid":false,"given":"Yuqi","family":"Bu","sequence":"first","affiliation":[{"name":"School of Software Engineering, South China University of Technology, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0207-0278","authenticated-orcid":false,"given":"Xin","family":"Wu","sequence":"additional","affiliation":[{"name":"School of Software Engineering, South China University of Technology, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1767-789X","authenticated-orcid":false,"given":"Yi","family":"Cai","sequence":"additional","affiliation":[{"name":"School of Software Engineering, South China University of Technology, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6410-615X","authenticated-orcid":false,"given":"Qiong","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Software Engineering, South China University of Technology, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0437-0557","authenticated-orcid":false,"given":"Tao","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of Biostatistics and Health Informatics, Institute of Psychiatry, Psychology and Neuroscience, King&#x0027;s College London, London, U.K."}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7691-347X","authenticated-orcid":false,"given":"Qingbao","family":"Huang","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, Guangxi University, Nanning, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.3042066"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197068"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9811895"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01000"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00308"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475629"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00180"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58577-8_7"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3219642"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01268"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3191696"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3183326"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2861573"},{"key":"ref14","first-page":"462","article-title":"Generating training data with language models: Towards zero-shot language understanding","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Meng","year":"2022"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.109480"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.357"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/j.aiopen.2024.01.004"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01507"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01436"},{"key":"ref20","first-page":"24824","article-title":"Chain-of-thought prompting elicits reasoning in large language models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Wei","year":"2022"},{"key":"ref21","first-page":"10764","article-title":"PAL: Program-aided language models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Gao","year":"2023"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.85"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.ijcnlp-main.45"},{"key":"ref24","first-page":"9118","article-title":"Language models as zero-shot planners: Extracting actionable knowledge for embodied agents","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Huang","year":"2022"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01005"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3205404"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3191841"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2018.2811621"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01661"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00179"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00928"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-acl.557"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1002\/widm.1379"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1111\/coin.12410"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3161832"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6895"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00404"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3132068"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1909.11065"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413593"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3297312"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00556"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3058614"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i3.20215"},{"key":"ref45","article-title":"MM-REACT: Prompting chatGPT for multimodal reasoning and action","author":"Yang","year":"2023"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01092"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01069"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1093\/nc\/niab025"},{"key":"ref49","first-page":"19730","article-title":"BLIP-2: Bootstrapping language-image pre-training with frozen image encoders and large language models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Li","year":"2023"},{"key":"ref50","first-page":"25994","article-title":"Multi-grained vision language pre-training: Aligning texts with visual concepts","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Zeng","year":"2022"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3019967"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46475-6_5"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.9"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00589"},{"key":"ref56","first-page":"2835","article-title":"Emergent abilities of large language models","author":"Wei","year":"2022","journal-title":"Trans. Mach. Learn. Res."},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2024.3387941"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00553"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2577031"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.322"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00636"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00742"},{"key":"ref63","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Radford","year":"2021"},{"key":"ref64","first-page":"240:1","article-title":"PALM: Scaling language modeling with pathways","volume":"24","author":"Chowdhery","year":"2023","journal-title":"J. Mach. Learn. Res."},{"key":"ref65","article-title":"GPT-4 technical report","year":"2023"},{"key":"ref66","first-page":"21558","article-title":"Is your code generated by chatGPT really correct? Rigorous evaluation of large language models for code generation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Liu","year":"2023"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1145\/3672459"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1016\/j.autcon.2022.104182"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01864"},{"key":"ref71","article-title":"AutoGen: Enabling next-gen LLM applications via multi-agent conversations framework","author":"Wu","year":"2024"}],"container-title":["IEEE Transactions on Multimedia"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6046\/10844992\/10912743.pdf?arnumber=10912743","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T18:15:31Z","timestamp":1754072131000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10912743\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":71,"URL":"https:\/\/doi.org\/10.1109\/tmm.2025.3543062","relation":{},"ISSN":["1520-9210","1941-0077"],"issn-type":[{"value":"1520-9210","type":"print"},{"value":"1941-0077","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]}}}