{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:57:00Z","timestamp":1781539020230,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T00:00:00Z","timestamp":1781481600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"Natural Science Research Project of Anhui Provincial Department of Education","award":["2024AH050451"],"award-info":[{"award-number":["2024AH050451"]}]},{"name":"Open Project of Anhui Provincial Key Laboratory of Smart Distribution Grid Operation and Control","award":["SYS-KFKT-202511"],"award-info":[{"award-number":["SYS-KFKT-202511"]}]},{"name":"Anhui Provincial Natural Science Foundation Hefei Municipal Natural","award":["2508085QC098"],"award-info":[{"award-number":["2508085QC098"]}]},{"name":"key projects of natural science research projects in Anhui-universities","award":["2024AH050462"],"award-info":[{"award-number":["2024AH050462"]}]},{"name":"Hefei Municipal Natural ScienceFoundation","award":["HZR2551"],"award-info":[{"award-number":["HZR2551"]}]},{"name":"The Natural Science Foundation of Anhui Province","award":["2508085QF250"],"award-info":[{"award-number":["2508085QF250"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,16]]},"DOI":"10.1145\/3805622.3810821","type":"proceedings-article","created":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:42:57Z","timestamp":1781534577000},"page":"138-146","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["GjPest4CMR: LLMs-Assisted Morphology-Aware Fine-grained Goji Pest Image-Text Retrieval"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-1996-5066","authenticated-orcid":false,"given":"Wenkai","family":"Ma","sequence":"first","affiliation":[{"name":"Anhui Agricultural University, Hefei, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3059-6629","authenticated-orcid":false,"given":"Wentao","family":"Ma","sequence":"additional","affiliation":[{"name":"Anhui Agricultural University, Hefei, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0578-2078","authenticated-orcid":false,"given":"Tan","family":"Wang","sequence":"additional","affiliation":[{"name":"Anhui Agricultural University, Hefei, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-7040-0527","authenticated-orcid":false,"given":"Yuwei","family":"Wang","sequence":"additional","affiliation":[{"name":"Anhui Agricultural University, Hefei, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-2055-2619","authenticated-orcid":false,"given":"Lu","family":"Liu","sequence":"additional","affiliation":[{"name":"Anhui Agricultural University, Hefei, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-4944-4231","authenticated-orcid":false,"given":"Junfei","family":"Yang","sequence":"additional","affiliation":[{"name":"Anhui Agricultural University, Hefei, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6607-3942","authenticated-orcid":false,"given":"Xianbao","family":"Xu","sequence":"additional","affiliation":[{"name":"Anhui Agricultural University, Hefei, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4386-2490","authenticated-orcid":false,"given":"Yuan","family":"Rao","sequence":"additional","affiliation":[{"name":"Anhui Agricultural University, Hefei, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,15]]},"reference":[{"key":"e_1_3_3_1_2_2","volume-title":"Proceedings of the International Conference on Learning Representations","author":"Chen Guangyi","year":"2023","unstructured":"Guangyi Chen, Weiran Yao, Xiangchen Song, Xinyue Li, Yongming Rao, and Kun Zhang. 2023. PLOT: Prompt learning with optimal transport for vision-language models. In Proceedings of the International Conference on Learning Representations."},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","unstructured":"Junyu Chen Yihua Gao Mingyuan Ge and Mingyong Li. 2025. Ambiguity-aware and high-order relation learning for multi-grained image-text matching. Knowledge-Based Systems 316 (2025) 113355. 10.1016\/j.knosys.2025.113355","DOI":"10.1016\/j.knosys.2025.113355"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","unstructured":"Shaofan Chen Guoyuan He Wentao Ma Hao Zhang Tongqing Zhou Siwei Wang and Lichuan Gu. 2026. Causal Direction Discovery via Related Conditional Residual. Pattern Recognition 176 (2026) 113196. 10.1016\/j.patcog.2026.113196","DOI":"10.1016\/j.patcog.2026.113196"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58577-8_7"},{"key":"e_1_3_3_1_6_2","unstructured":"Daniel Csizmadia Andrei Codreanu Victor Sim Vighnesh Prabhu Michael Lu Kevin Zhu Sean O\u2019Brien and Vasu Sharma. 2025. Distill CLIP: Enhancing image-text retrieval via cross-modal transformer distillation and masked language modeling. arxiv:https:\/\/arXiv.org\/abs\/2505.21549"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","unstructured":"Guowei Dai Jingchao Fan and Christine Dewi. 2023. ITF-WPI: Image and text based cross-modal feature fusion model for wolfberry pest recognition. Computers and Electronics in Agriculture 212 (Sept. 2023) 108129. 10.1016\/j.compag.2023.108129","DOI":"10.1016\/j.compag.2023.108129"},{"key":"e_1_3_3_1_8_2","volume-title":"Proceedings of the British Machine Vision Conference","author":"Faghri Fartash","year":"2018","unstructured":"Fartash Faghri, David\u00a0J. Fleet, Jamie\u00a0Ryan Kiros, and Sanja Fidler. 2018. VSE++: Improving visual-semantic embeddings with hard negatives. In Proceedings of the British Machine Vision Conference."},{"key":"e_1_3_3_1_9_2","volume-title":"Proceedings of the International Conference on Learning Representations","author":"Huang Tengjun","year":"2024","unstructured":"Tengjun Huang. 2024. Efficient remote sensing with harmonized transfer learning and modality alignment. In Proceedings of the International Conference on Learning Representations."},{"key":"e_1_3_3_1_10_2","first-page":"2787","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Jiang Ding","year":"2023","unstructured":"Ding Jiang and Mang Ye. 2023. Cross-modal implicit relation reasoning and alignment for text-based person re-identification. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2787\u20132797."},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01369"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01225-0_13"},{"key":"e_1_3_3_1_13_2","unstructured":"Hao Li Jiayang Gu Jingkuan Song An Zhang and Lianli Gao. 2024. One-step Noisy Label Mitigation. arxiv:https:\/\/arXiv.org\/abs\/2410.01944\u00a0[cs.CV]"},{"key":"e_1_3_3_1_14_2","volume-title":"Proceedings of the 39th International Conference on Machine Learning","author":"Li Junnan","year":"2022","unstructured":"Junnan Li, Dongxu Li, Caiming Xiong, and Steven C.\u00a0H. Hoi. 2022. BLIP: Bootstrapping language-image pre-training for unified vision-language understanding and generation. In Proceedings of the 39th International Conference on Machine Learning."},{"key":"e_1_3_3_1_15_2","volume-title":"Proceedings of the Advances in Neural Information Processing Systems","author":"Li Junnan","year":"2021","unstructured":"Junnan Li, Ramprasaath\u00a0R. Selvaraju, Akhilesh\u00a0Deepak Gotmare, Shafiq Joty, Caiming Xiong, and Steven C.\u00a0H. Hoi. 2021. Align before fuse: Vision and language representation learning with momentum distillation. In Proceedings of the Advances in Neural Information Processing Systems."},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612244"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","unstructured":"Wenyong Li Dujin Wang Ming Li Yulin Gao Jianwei Wu and Xinting Yang. 2021. Field detection of tiny pests from sticky trap images using deep learning in agricultural greenhouse. Computers and Electronics in Agriculture 183 (April 2021) 106048. 10.1016\/j.compag.2021.106048","DOI":"10.1016\/j.compag.2021.106048"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","unstructured":"Wenyong Li Tengfei Zheng Zhankui Yang Ming Li Chuanheng Sun and Xinting Yang. 2021. Classification and detection of insects from field images using deep learning for smart pest management: A systematic review. Ecological Informatics 66 (December 2021) 101460. 10.1016\/j.ecoinf.2021.101460","DOI":"10.1016\/j.ecoinf.2021.101460"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01093"},{"key":"e_1_3_3_1_20_2","unstructured":"Haotian Liu Chunyuan Li Qingyang Wu and Yong\u00a0Jae Lee. 2023. Visual instruction tuning. arxiv:https:\/\/arXiv.org\/abs\/2304.08485"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","unstructured":"Liu Liu Rujing Wang Chengjun Xie Po Yang Fangyuan Wang Sud Sudirman and Wancai Liu. 2019. PestNet: An end-to-end deep learning approach for large-scale multi-class pest detection and classification. IEEE Access 7 (2019) 45301\u201345312. 10.1109\/ACCESS.2019.2909522","DOI":"10.1109\/ACCESS.2019.2909522"},{"key":"e_1_3_3_1_22_2","unstructured":"Yating Liu Zimo Liu Xiangyuan Lan Wenming Yang Yaowei Li and Qingmin Liao. 2025. DM-Adapter: Domain-aware mixture-of-adapters for text-based person retrieval. arxiv:https:\/\/arXiv.org\/abs\/2503.04144"},{"key":"e_1_3_3_1_23_2","volume-title":"Proceedings of the Advances in Neural Information Processing Systems","author":"Lu Jiasen","year":"2019","unstructured":"Jiasen Lu, Dhruv Batra, Devi Parikh, and Stefan Lee. 2019. ViLBERT: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. In Proceedings of the Advances in Neural Information Processing Systems."},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"crossref","unstructured":"Wentao Ma Qingchao Chen Tongqing Zhou Shan Zhao and Zhiping Cai. 2023. Using multimodal contrastive knowledge distillation for video-text retrieval. IEEE Transactions on Circuits and Systems for Video Technology 33 10 (2023) 5486\u20135497.","DOI":"10.1109\/TCSVT.2023.3257193"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","unstructured":"Wentao Ma Xiaoqian Wu Shaofan Chen Wei Liu Shan Zhao Qian Wan and Lichuan Gu. 2026. TSGR2: Image-text matching via triple-level scene graph relation reasoning. Applied Soft Computing 187 (2026) 114323. 10.1016\/j.asoc.2025.114323","DOI":"10.1016\/j.asoc.2025.114323"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","unstructured":"Wentao Ma Xinyi Wu Shan Zhao Tongqing Zhou Dan Guo Lichuan Gu Zhiping Cai and Meng Wang. 2024. FedSH: Towards privacy-preserving text-based person re-identification. IEEE Transactions on Multimedia 26 (2024) 5065\u20135077. 10.1109\/TMM.2023.3330091","DOI":"10.1109\/TMM.2023.3330091"},{"key":"e_1_3_3_1_27_2","unstructured":"OpenAI. 2023. GPT-4 technical report. arxiv:https:\/\/arXiv.org\/abs\/2303.08774"},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612374"},{"key":"e_1_3_3_1_29_2","volume-title":"Proceedings of the 38th International Conference on Machine Learning","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021. Learning transferable visual models from natural language supervision. In Proceedings of the 38th International Conference on Machine Learning."},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","unstructured":"Di Wang Feng Yan Yifeng Wang Lin Zhao Xiao Liang Haodi Zhong and Ronghua Zhang. 2024. Fine-grained semantics-aware representation learning for text-based person retrieval. Proceedings of the 2024 International Conference on Multimedia Retrieval (2024) 92\u2013100. 10.1145\/3652583.3658062","DOI":"10.1145\/3652583.3658062"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","unstructured":"Changji Wen Hongrui Chen Zhenyu Ma Tian Zhang Ce Yang Hengqiang Su and Hongbing Chen. 2022. Pest-YOLO: A model for large-scale multi-class dense and tiny pest detection and counting. Frontiers in Plant Science 13 (2022) 973985. 10.3389\/fpls.2022.973985","DOI":"10.3389\/fpls.2022.973985"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i6.28433"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00899"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.02317"},{"key":"e_1_3_3_1_35_2","volume-title":"Proceedings of the International Conference on Learning Representations","author":"Yao Lewei","year":"2022","unstructured":"Lewei Yao, Runhui Huang, Lu Hou, Guansong Lu, Minzhe Niu, Hang Xu, Xiaodan Liang, Zhenguo Li, Xin Jiang, and Chunjing Xu. 2022. FILIP: Fine-grained interactive language-image pre-training. In Proceedings of the International Conference on Learning Representations."},{"key":"e_1_3_3_1_36_2","unstructured":"Hao Yin Xin Man Feiyu Chen Jie Shao and Heng\u00a0Tao Shen. 2025. Cross-modal full-mode fine-grained alignment for text-to-image person retrieval. arxiv:https:\/\/arXiv.org\/abs\/2509.13754"},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"publisher","unstructured":"Kejian Yu Wentao Ma Shaofan Chen Xiaoqian Wu Yuwei Wang and Tongqing Zhou. 2026. CPCR: Crop pests cross-modal retrieval with prior instruction via multimodal large language model. Computers and Electronics in Agriculture 245 (2026) 111546. 10.1016\/j.compag.2026.111546","DOI":"10.1016\/j.compag.2026.111546"},{"key":"e_1_3_3_1_38_2","doi-asserted-by":"publisher","unstructured":"Zhiqiang Yuan Wenkai Zhang Kun Fu Xuan Li Chubo Deng Hongqi Wang and Xian Sun. 2021. Exploring a fine-grained multiscale method for cross-modal remote sensing image retrieval. IEEE Transactions on Geoscience and Remote Sensing (2021). 10.1109\/TGRS.2021.3078451","DOI":"10.1109\/TGRS.2021.3078451"},{"key":"e_1_3_3_1_39_2","doi-asserted-by":"publisher","unstructured":"Ruigeng Zeng Wentao Ma Tongqing Zhou Qinglin Wang Siqi Wang Xinjun Mao and Jie Liu. 2026. Towards Mitigation of False Negatives in Text-to-Image Person Re-identification. IEEE Transactions on Multimedia (2026). 10.1109\/TMM.2026.3664948","DOI":"10.1109\/TMM.2026.3664948"},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680591"},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"publisher","unstructured":"Jingqi Zhang Jiaqi Ji Huiying Ru Yilei Xia and Juan Wen. 2025. A global-local dual-stream collaborative enhancement model for cross-modal image and text retrieval in remote sensing scenarios. AIP Advances 15 8 (2025) 085009. 10.1063\/5.0280560","DOI":"10.1063\/5.0280560"},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","unstructured":"Yan Zhang Zhong Ji Changxu Meng Yanwei Pang and Jungong Han. 2026. iEBAKER: Improved remote sensing image-text retrieval framework via eliminate before align and keyword explicit reasoning. Expert Systems with Applications 296 (2026) 128968. 10.1016\/j.eswa.2025.128968","DOI":"10.1016\/j.eswa.2025.128968"},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02585"},{"key":"e_1_3_3_1_44_2","doi-asserted-by":"crossref","unstructured":"Zhuoyang Zou Xinghui Zhu Qinying Zhu Yi Liu and Lei Zhu. 2024. CREAMY: Cross-modal recipe retrieval by avoiding matching imperfectly. IEEE Access 12 (2024) 33283\u201333295.","DOI":"10.1109\/ACCESS.2024.3370158"}],"event":{"name":"ICMR '26: International Conference on Multimedia Retrieval","location":"Amsterdam The Netherlands","acronym":"ICMR '26","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2026 International Conference on Multimedia Retrieval"],"original-title":[],"deposited":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:26:11Z","timestamp":1781537171000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805622.3810821"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,15]]},"references-count":43,"alternative-id":["10.1145\/3805622.3810821","10.1145\/3805622"],"URL":"https:\/\/doi.org\/10.1145\/3805622.3810821","relation":{},"subject":[],"published":{"date-parts":[[2026,6,15]]},"assertion":[{"value":"2026-06-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}