{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T16:25:22Z","timestamp":1773246322047,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":34,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,6,12]],"date-time":"2023-06-12T00:00:00Z","timestamp":1686528000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Zhejiang Provincial Natural Science Foundation of China","award":["LR21F020002"],"award-info":[{"award-number":["LR21F020002"]}]},{"name":"Natural Science Foundation of China","award":["61976192,62102365"],"award-info":[{"award-number":["61976192,62102365"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,6,12]]},"DOI":"10.1145\/3591106.3592236","type":"proceedings-article","created":{"date-parts":[[2023,6,8]],"date-time":"2023-06-08T22:33:38Z","timestamp":1686263618000},"page":"398-406","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":43,"title":["Reducing Semantic Confusion: Scene-aware Aggregation Network for Remote Sensing Cross-modal Retrieval"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5968-5209","authenticated-orcid":false,"given":"Jiancheng","family":"Pan","sequence":"first","affiliation":[{"name":"Zhejiang University of Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1749-9509","authenticated-orcid":false,"given":"Qing","family":"Ma","sequence":"additional","affiliation":[{"name":"Zhejiang University of Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6177-3862","authenticated-orcid":false,"given":"Cong","family":"Bai","sequence":"additional","affiliation":[{"name":"Zhejiang University of Technology, China"}]}],"member":"320","published-online":{"date-parts":[[2023,6,12]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.3390\/rs12030405"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.2975718"},{"key":"e_1_3_2_1_3_1","volume-title":"Intra-Modal Constraint Loss for Image-Text Retrieval. In 2022 IEEE International Conference on Image Processing (ICIP). IEEE, 4023\u20134027","author":"Chen Jianan","year":"2022","unstructured":"Jianan Chen, Lu Zhang, Qiong Wang, Cong Bai, and Kidiyo Kpalma. 2022. Intra-Modal Constraint Loss for Image-Text Retrieval. In 2022 IEEE International Conference on Image Processing (ICIP). IEEE, 4023\u20134027."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3127851"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2021.3105551"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2021.3070872"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2016.2598228"},{"key":"e_1_3_2_1_8_1","volume-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprint arXiv:1412.3555","author":"Chung Junyoung","year":"2014","unstructured":"Junyoung Chung, Caglar Gulcehre, KyungHyun Cho, and Yoshua Bengio. 2014. Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprint arXiv:1412.3555 (2014)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00296"},{"key":"e_1_3_2_1_11_1","volume-title":"Improving visual-semantic embeddings with hard negatives. arXiv preprint arXiv:1707.05612","author":"Faghri Fartash","year":"2017","unstructured":"Fartash Faghri, David\u00a0J Fleet, Jamie\u00a0Ryan Kiros, and Sanja Fidler. 2017. Vse++: Improving visual-semantic embeddings with hard negatives. arXiv preprint arXiv:1707.05612 (2017)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"e_1_3_2_1_13_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma P","year":"2014","unstructured":"Diederik\u00a0P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01225-0_13"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2017.2776321"},{"key":"e_1_3_2_1_16_1","first-page":"1","article-title":"Fusion-based correlation learning model for cross-modal remote sensing image retrieval","volume":"19","author":"Lv Yafei","year":"2021","unstructured":"Yafei Lv, Wei Xiong, Xiaohan Zhang, and Yaqi Cui. 2021. Fusion-based correlation learning model for cross-modal remote sensing image retrieval. IEEE Geoscience and Remote Sensing Letters 19 (2021), 1\u20135.","journal-title":"IEEE Geoscience and Remote Sensing Letters"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/PRRS.2018.8486338"},{"key":"e_1_3_2_1_18_1","unstructured":"Li Mi Siran Li Christel Chappuis and Devis Tuia. 2022. Knowledge-Aware Cross-Modal Text-Image Retrieval for Remote Sensing Images. (2022)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2016.07.001"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2015.7301382"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413961"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462829"},{"key":"e_1_3_2_1_24_1","volume-title":"Where Does the Performance Improvement Come From?-A Reproducibility Concern about Image-Text Retrieval. arXiv preprint arXiv:2203.03853","author":"Rao Jun","year":"2022","unstructured":"Jun Rao, Fei Wang, Liang Ding, Shuhan Qi, Yibing Zhan, Weifeng Liu, and Dacheng Tao. 2022. Where Does the Performance Improvement Come From?-A Reproducibility Concern about Image-Text Retrieval. arXiv preprint arXiv:2203.03853 (2022)."},{"key":"e_1_3_2_1_25_1","volume-title":"Visualizing data using t-SNE.Journal of machine learning research 9, 11","author":"Maaten Laurens Van\u00a0der","year":"2008","unstructured":"Laurens Van\u00a0der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE.Journal of machine learning research 9, 11 (2008)."},{"key":"e_1_3_2_1_26_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2017.2685945"},{"key":"e_1_3_2_1_28_1","volume-title":"Deep feature aggregation framework driven by graph convolutional network for scene classification in remote sensing","author":"Xu Kejie","year":"2021","unstructured":"Kejie Xu, Hong Huang, Peifang Deng, and Yuan Li. 2021. Deep feature aggregation framework driven by graph convolutional network for scene classification in remote sensing. IEEE Transactions on Neural Networks and Learning Systems (2021)."},{"key":"e_1_3_2_1_29_1","volume-title":"Exploring a fine-grained multiscale method for cross-modal remote sensing image retrieval. arXiv preprint arXiv:2204.09868","author":"Yuan Zhiqiang","year":"2022","unstructured":"Zhiqiang Yuan, Wenkai Zhang, Kun Fu, Xuan Li, Chubo Deng, Hongqi Wang, and Xian Sun. 2022. Exploring a fine-grained multiscale method for cross-modal remote sensing image retrieval. arXiv preprint arXiv:2204.09868 (2022)."},{"key":"e_1_3_2_1_30_1","first-page":"1","article-title":"A lightweight multi-scale crossmodal text-image retrieval method in remote sensing","volume":"60","author":"Yuan Zhiqiang","year":"2021","unstructured":"Zhiqiang Yuan, Wenkai Zhang, Xuee Rong, Xuan Li, Jialiang Chen, Hongqi Wang, Kun Fu, and Xian Sun. 2021. A lightweight multi-scale crossmodal text-image retrieval method in remote sensing. IEEE Transactions on Geoscience and Remote Sensing 60 (2021), 1\u201319.","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jag.2022.103071"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2022.3163706"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/IGARSS46834.2022.9883242"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.3390\/rs11050494"}],"event":{"name":"ICMR '23: International Conference on Multimedia Retrieval","location":"Thessaloniki Greece","acronym":"ICMR '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2023 ACM International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3591106.3592236","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3591106.3592236","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T17:51:22Z","timestamp":1750182682000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3591106.3592236"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,12]]},"references-count":34,"alternative-id":["10.1145\/3591106.3592236","10.1145\/3591106"],"URL":"https:\/\/doi.org\/10.1145\/3591106.3592236","relation":{},"subject":[],"published":{"date-parts":[[2023,6,12]]},"assertion":[{"value":"2023-06-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}