{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T07:28:34Z","timestamp":1763191714139,"version":"3.45.0"},"reference-count":37,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:00:00Z","timestamp":1751241600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:00:00Z","timestamp":1751241600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100010097","name":"China Association for Science and Technology","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100010097","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,6,30]]},"DOI":"10.1109\/ijcnn64981.2025.11228013","type":"proceedings-article","created":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T18:46:15Z","timestamp":1763145975000},"page":"1-10","source":"Crossref","is-referenced-by-count":0,"title":["Query Rephrasing for Context Independence in Scene Knowledge-guided Visual Grounding"],"prefix":"10.1109","author":[{"given":"Xilong","family":"Qin","sequence":"first","affiliation":[{"name":"College of Systems Engineering National University of Defense Technology,Changsha,China"}]},{"given":"Jing","family":"Ye","sequence":"additional","affiliation":[{"name":"Hunan Institute of Advanced Technology,Changsha,China"}]},{"given":"Haixiang","family":"Zhu","sequence":"additional","affiliation":[{"name":"Hunan Institute of Advanced Technology,Changsha,China"}]},{"given":"Wansen","family":"Wu","sequence":"additional","affiliation":[{"name":"Navy Submarine Academy,Qingdao,China"}]},{"given":"Yue","family":"Hu","sequence":"additional","affiliation":[{"name":"College of Systems Engineering National University of Defense Technology,Changsha,China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01444"},{"key":"ref2","article-title":"Chatglm: A family of large language models from GLM-130B to GLM-4 all tools","volume-title":"CoRR","volume":"abs\/2406.12793","author":"Zeng","year":"2024"},{"key":"ref3","first-page":"2591","article-title":"Taking a hint: Leveraging explanations to make vision and language models more grounded","volume-title":"2019 IEEE\/CVF International Conference on Computer Vision (ICCV)","author":"Selvaraju"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2018.2797921"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.470"},{"key":"ref6","first-page":"684","article-title":"Learning to compose and reason with language tree structures for visual grounding","volume-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence","volume":"44","author":"Hong","year":"2019"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00142"},{"key":"ref8","first-page":"1960","article-title":"Neighbourhood watch: Referring expression comprehension via language-guided graph attention networks","volume-title":"2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Wang"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00205"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00477"},{"key":"ref11","first-page":"4280","article-title":"G3raphground: Graph-based language grounding","volume-title":"2019 IEEE\/CVF International Conference on Computer Vision (ICCV)","author":"Bajaj"},{"article-title":"Very deep convolutional networks for large-scale image recognition","volume-title":"3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA, May 7-9, 2015, Conference Track Proceedings","author":"Simonyan","key":"ref12"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01661"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01005"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00478"},{"key":"ref17","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-030-58568-6_23","article-title":"Improving one-stage visual grounding by recursive sub-query construction","volume-title":"European Conference on Computer Vision","author":"Yang"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2021.3090426"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00479"},{"key":"ref20","first-page":"1749","article-title":"Transvg: End-to-end visual grounding with transformers","volume-title":"2021 IEEE\/CVF International Conference on Computer Vision (ICCV)","author":"Deng"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52733.2024.02254"},{"key":"ref22","article-title":"Ofa: Unifying architectures, tasks, and modalities through a simple sequence-to-sequence learning framework","author":"Wang","year":"2022","journal-title":"CoRR"},{"key":"ref23","article-title":"Align before fuse: Vision and language representation learning with momentum distillation","volume-title":"Neural Information Processing Systems","author":"Li","year":"2021"},{"key":"ref24","doi-asserted-by":"crossref","DOI":"10.1145\/3447548.3467206","article-title":"M6: Multi-modality-to-multi-modality multitask mega-transformer for unified pretraining","volume-title":"Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery & Data Mining","author":"Lin"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20059-5_30"},{"article-title":"Elevater: A benchmark and toolkit for evaluating language-augmented visual models","year":"2022","author":"Li","key":"ref26"},{"article-title":"Glipv2: Unifying localization and vision-language understanding","year":"2022","author":"Zhang","key":"ref27"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01069"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00180"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3321501"},{"key":"ref31","article-title":"Attention is all you need","volume-title":"Neural Information Processing Systems","author":"Vaswani","year":"2017"},{"key":"ref32","article-title":"BERT is not a knowledge base (yet): Factual knowledge vs. name-based reasoning in unsupervised QA","volume-title":"CoRR","volume":"abs\/1911.03681","author":"P\u00f6rner","year":"2019"},{"key":"ref33","first-page":"770","article-title":"Deep residual learning for image recognition","volume-title":"2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","author":"He"},{"key":"ref34","article-title":"Gpt-4o system card","volume-title":"CoRR","volume":"abs\/2410.21276","author":"Hurst","year":"2024"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52733.2024.02484"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/d14-1086"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01286"}],"event":{"name":"2025 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2025,6,30]]},"location":"Rome, Italy","end":{"date-parts":[[2025,7,5]]}},"container-title":["2025 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11227166\/11227148\/11228013.pdf?arnumber=11228013","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T07:26:31Z","timestamp":1763191591000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11228013\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,30]]},"references-count":37,"URL":"https:\/\/doi.org\/10.1109\/ijcnn64981.2025.11228013","relation":{},"subject":[],"published":{"date-parts":[[2025,6,30]]}}}