{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:57:13Z","timestamp":1781539033183,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T00:00:00Z","timestamp":1781481600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,16]]},"DOI":"10.1145\/3805622.3810861","type":"proceedings-article","created":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:42:57Z","timestamp":1781534577000},"page":"79-87","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["DIR-RSCLIP: A Degradation-Invariant Cross-Modal Retrieval Network for Remote Sensing Images"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-7220-2810","authenticated-orcid":false,"given":"Xiaotian","family":"Li","sequence":"first","affiliation":[{"name":"College Of information Science and Technology, Gansu Agricultural University, Lanzhou, Gansu, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-5629-5434","authenticated-orcid":false,"given":"Chenyu","family":"Zhang","sequence":"additional","affiliation":[{"name":"College of Horticulture, Gansu Agricultural University, Lanzhou, Gansu, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-9647-5544","authenticated-orcid":false,"given":"Changbai","family":"Chen","sequence":"additional","affiliation":[{"name":"College Of information Science and Technology, Gansu Agricultural University, Lanzhou, Gansu, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-0152-703X","authenticated-orcid":false,"given":"Zhongjie","family":"Zhu","sequence":"additional","affiliation":[{"name":"College Of information Science and Technology, Gansu Agricultural University, Lanzhou, Gansu, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,15]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20071-7_2"},{"key":"e_1_3_3_1_3_2","first-page":"1597","volume-title":"International Conference on Machine Learning","author":"Chen Ting","year":"2020","unstructured":"Ting Chen, Simon Kornblith, Mohammad Norouzi, and Geoffrey Hinton. 2020. A Simple Framework for Contrastive Learning of Visual Representations. In International Conference on Machine Learning. PMLR, 1597\u20131607."},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00391"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"crossref","unstructured":"Gong Cheng Xingxing Xie Junwei Han Lei Guo and Gui-Song Xia. 2020. Remote Sensing Image Scene Classification Meets Deep Learning: Challenges Methods Benchmarks and Opportunities. IEEE Journal of Selected Topics in Applied Earth Observations and Remote Sensing 13 (2020) 3735\u20133756.","DOI":"10.1109\/JSTARS.2020.3005403"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW50498.2020.00359"},{"key":"e_1_3_3_1_7_2","first-page":"4171","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics. 4171\u20134186."},{"key":"e_1_3_3_1_8_2","volume-title":"International Conference on Learning Representations","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, et\u00a0al. 2021. An Image Is Worth 16x16 Words: Transformers for Image Recognition at Scale. In International Conference on Learning Representations."},{"key":"e_1_3_3_1_9_2","unstructured":"Yaroslav Ganin Evgeniya Ustinova Hana Ajakan Pascal Germain Hugo Larochelle Fran\u00e7ois Laviolette Mario Marchand and Victor Lempitsky. 2016. Domain-Adversarial Training of Neural Networks. Journal of Machine Learning Research 17 1 (2016) 2096\u20132130."},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"e_1_3_3_1_11_2","volume-title":"International Conference on Learning Representations","author":"Hendrycks Dan","year":"2019","unstructured":"Dan Hendrycks and Thomas Dietterich. 2019. Benchmarking Neural Network Robustness to Common Corruptions and Perturbations. In International Conference on Learning Representations."},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00969"},{"key":"e_1_3_3_1_13_2","first-page":"4904","volume-title":"International Conference on Machine Learning","author":"Jia Chao","year":"2021","unstructured":"Chao Jia, Yinfei Yang, Ye Xia, Yi-Ting Chen, Zarana Parekh, Hieu Pham, Quoc\u00a0V Le, Yunhsuan Sung, Zhen Li, and Tom Duerig. 2021. Scaling Up Visual and Vision-Language Representation Learning with Noisy Text Supervision. In International Conference on Machine Learning. PMLR, 4904\u20134916."},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"crossref","unstructured":"Yifan Jiang Xinyu Gong Ding Liu Yu Cheng Chen Fang Xiaohui Shen Jianchao Yang Pan Zhou and Zhangyang Wang. 2021. EnlightenGAN: Deep Light Enhancement Without Paired Supervision. IEEE Transactions on Image Processing 30 (2021) 2340\u20132349.","DOI":"10.1109\/TIP.2021.3051462"},{"key":"e_1_3_3_1_15_2","first-page":"18661","volume-title":"Advances in Neural Information Processing Systems","author":"Khosla Prannay","year":"2020","unstructured":"Prannay Khosla, Piotr Teterwak, Chen Wang, Aaron Sarna, Yonglong Tian, Phillip Isola, Aaron Maschinot, Ce Liu, and Dilip Krishnan. 2020. Supervised Contrastive Learning. In Advances in Neural Information Processing Systems. 18661\u201318673."},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01834"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01693"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.511"},{"key":"e_1_3_3_1_19_2","first-page":"12888","volume-title":"International Conference on Machine Learning","author":"Li Junnan","year":"2022","unstructured":"Junnan Li, Dongxu Li, Caiming Xiong, and Steven Hoi. 2022. BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation. In International Conference on Machine Learning. PMLR, 12888\u201312900."},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00060"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"crossref","unstructured":"Fan Liu Delong Chen Zhangqingyun Guan Xiaocong Zhou Jiale Zhu Qiaolin Ye Liyong Fu and Jun Zhou. 2024. RemoteCLIP: A Vision Language Foundation Model for Remote Sensing. IEEE Transactions on Geoscience and Remote Sensing 62 (2024) 1\u201316.","DOI":"10.1109\/TGRS.2024.3390838"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i2.20072"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"crossref","unstructured":"Xiaoqiang Lu Binqiang Wang Xiangtao Zheng and Xuelong Li. 2018. Exploring Models and Data for Remote Sensing Image Caption Generation. IEEE Transactions on Geoscience and Remote Sensing 56 4 (2018) 2183\u20132195.","DOI":"10.1109\/TGRS.2017.2776321"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00555"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612374"},{"key":"e_1_3_3_1_27_2","volume-title":"Advances in Neural Information Processing Systems","author":"Potlapalli Vaishnav","year":"2023","unstructured":"Vaishnav Potlapalli, Syed\u00a0Waqas Zamir, Salman Khan, and Fahad\u00a0Shahbaz Khan. 2023. PromptIR: Prompting for All-in-One Blind Image Restoration. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6865"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462829"},{"key":"e_1_3_3_1_30_2","first-page":"8748","volume-title":"International Conference on Machine Learning","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning Transferable Visual Models from Natural Language Supervision. In International Conference on Machine Learning. PMLR, 8748\u20138763."},{"key":"e_1_3_3_1_31_2","volume-title":"International Conference on Learning Representations","author":"Robinson Joshua","year":"2021","unstructured":"Joshua Robinson, Ching-Yao Chuang, Suvrit Sra, and Stefanie Jegelka. 2021. Contrastive Learning with Hard Negative Samples. In International Conference on Learning Representations."},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/3678717.3691318"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19800-7_9"},{"key":"e_1_3_3_1_34_2","unstructured":"Aaron van\u00a0den Oord Yazhe Li and Oriol Vinyals. 2018. Representation Learning with Contrastive Predictive Coding. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1807.03748 (2018)."},{"key":"e_1_3_3_1_35_2","first-page":"5998","volume-title":"Advances in Neural Information Processing Systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention Is All You Need. In Advances in Neural Information Processing Systems. 5998\u20136008."},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"crossref","unstructured":"Yaxiong Wang Xueming Li Zongxu Pan Yibo Yang Fei Ma and Ning Xu. 2023. A Fine-Grained Semantic Alignment Method Specific to Aggregate Multi-Scale Information for Cross-Modal Remote Sensing Image Retrieval. Sensors 23 20 (2023) 8437.","DOI":"10.3390\/s23208437"},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01716"},{"key":"e_1_3_3_1_38_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i6.28393"},{"key":"e_1_3_3_1_39_2","unstructured":"Yuting Yuan Yang Zhan and Zhixi Xiong. 2023. Parameter-Efficient Transfer Learning for Remote Sensing Image-Text Retrieval. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2308.12509 (2023)."},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"crossref","unstructured":"Zhiqiang Yuan Wenkai Zhang Kun Fu Xuan Tian and Qi Zheng. 2022. Exploring a Fine-Grained Multiscale Method for Cross-Modal Remote Sensing Image Retrieval. IEEE Transactions on Geoscience and Remote Sensing 60 (2022) 1\u201319.","DOI":"10.1109\/TGRS.2021.3078451"},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"crossref","unstructured":"Zhiqiang Yuan Wenkai Zhang Xuan Tian Xuecheng Pu Dongliang Wang and Kun Fu. 2022. Remote Sensing Cross-Modal Text-Image Retrieval Based on Global and Local Information. IEEE Transactions on Geoscience and Remote Sensing 60 (2022) 1\u201316.","DOI":"10.1109\/TGRS.2022.3163706"},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00564"},{"key":"e_1_3_3_1_43_2","volume-title":"International Conference on Learning Representations","author":"Zhang Hongyi","year":"2018","unstructured":"Hongyi Zhang, Moustapha Cisse, Yann\u00a0N Dauphin, and David Lopez-Paz. 2018. mixup: Beyond Empirical Risk Minimization. In International Conference on Learning Representations."},{"key":"e_1_3_3_1_44_2","doi-asserted-by":"crossref","unstructured":"Wenkai Zhang Kun Fu Xuan Li and Zhiqiang Yuan. 2024. Hypersphere-Based Remote Sensing Cross-Modal Text-Image Retrieval via Curriculum Learning. IEEE Transactions on Geoscience and Remote Sensing 62 (2024) 1\u201316.","DOI":"10.1109\/TGRS.2024.3404605"},{"key":"e_1_3_3_1_45_2","doi-asserted-by":"crossref","unstructured":"Xiao Zhang Wenxuan Li Xiao Wang Lei Wang Feng Tian and Wenpeng Lu. 2023. A Fusion Encoder with Multi-Task Guidance for Cross-Modal Text-Image Retrieval in Remote Sensing. Remote Sensing 15 18 (2023) 4637.","DOI":"10.3390\/rs15184637"},{"key":"e_1_3_3_1_46_2","doi-asserted-by":"crossref","unstructured":"Zilun Zhang Tiancheng Zhao Yulong Guo and Jianwei Yin. 2024. RS5M and GeoRSCLIP: A Large-Scale Vision-Language Dataset and a Large Vision-Language Model for Remote Sensing. IEEE Transactions on Geoscience and Remote Sensing 62 (2024) 1\u201316.","DOI":"10.1109\/TGRS.2024.3449154"}],"event":{"name":"ICMR '26: International Conference on Multimedia Retrieval","location":"Amsterdam The Netherlands","acronym":"ICMR '26","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2026 International Conference on Multimedia Retrieval"],"original-title":[],"deposited":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:37:09Z","timestamp":1781537829000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805622.3810861"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,15]]},"references-count":45,"alternative-id":["10.1145\/3805622.3810861","10.1145\/3805622"],"URL":"https:\/\/doi.org\/10.1145\/3805622.3810861","relation":{},"subject":[],"published":{"date-parts":[[2026,6,15]]},"assertion":[{"value":"2026-06-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}