{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T14:34:27Z","timestamp":1773930867663,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681318","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:27Z","timestamp":1729925967000},"page":"2031-2040","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["Rethinking the Implicit Optimization Paradigm with Dual Alignments for Referring Remote Sensing Image Segmentation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8665-4886","authenticated-orcid":false,"given":"Yuwen","family":"Pan","sequence":"first","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8009-4240","authenticated-orcid":false,"given":"Rui","family":"Sun","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8553-7901","authenticated-orcid":false,"given":"Yuan","family":"Wang","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1856-9564","authenticated-orcid":false,"given":"Tianzhu","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China &amp; Peng Cheng Laboratory, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1151-1792","authenticated-orcid":false,"given":"Yongdong","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China &amp; State Key Laboratory of Communication Content Cognition, People's Daily Online, Hefei, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11036-020-01703-3"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2016.01.028"},{"key":"e_1_3_2_1_3_1","first-page":"234","article-title":"A survey on threshold based segmentation technique in image processing","volume":"3","author":"Bhargavi K","year":"2014","unstructured":"K Bhargavi and S Jyothi. 2014. A survey on threshold based segmentation technique in image processing. International Journal of Innovative Research and Development, Vol. 3, 12 (2014), 234--239.","journal-title":"International Journal of Innovative Research and Development"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"e_1_3_2_1_5_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01601"},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings, Part V 14","author":"Duan Liuyun","year":"2016","unstructured":"Liuyun Duan and Florent Lafarge. 2016. Towards large-scale city reconstruction from satellites. In Computer Vision--ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11--14, 2016, Proceedings, Part V 14. Springer, 89--104."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1080\/0143116031000103853"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.compeleceng.2022.108223"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_11_1","volume-title":"Long short-term memory. Neural computation","author":"Hochreiter Sepp","year":"1997","unstructured":"Sepp Hochreiter and J\u00fcrgen Schmidhuber. 1997. Long short-term memory. Neural computation, Vol. 9, 8 (1997), 1735--1780."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00448"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01050"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i1.19984"},{"key":"e_1_3_2_1_15_1","volume-title":"Proceedings, Part X 16","author":"Hui Tianrui","year":"2020","unstructured":"Tianrui Hui, Si Liu, Shaofei Huang, Guanbin Li, Sansi Yu, Faxi Zhang, and Jizhong Han. 2020. Linguistic structure guided context modeling for referring image segmentation. In Computer Vision--ECCV 2020: 16th European Conference, Glasgow, UK, August 23--28, 2020, Proceedings, Part X 16. Springer, 59--75."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00973"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00180"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.isprsjprs.2021.01.020"},{"key":"e_1_3_2_1_19_1","volume-title":"Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems","author":"Krizhevsky Alex","year":"2012","unstructured":"Alex Krizhevsky, Ilya Sutskever, and Geoffrey E Hinton. 2012. Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems, Vol. 25 (2012)."},{"key":"e_1_3_2_1_20_1","volume-title":"Deep learning. nature","author":"LeCun Yann","year":"2015","unstructured":"Yann LeCun, Yoshua Bengio, and Geoffrey Hinton. 2015. Deep learning. nature, Vol. 521, 7553 (2015), 436--444."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00602"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.143"},{"key":"e_1_3_2_1_23_1","first-page":"4761","article-title":"Cross-modal progressive comprehension for referring segmentation","volume":"44","author":"Liu Si","year":"2021","unstructured":"Si Liu, Tianrui Hui, Shaofei Huang, Yunchao Wei, Bo Li, and Guanbin Li. 2021. Cross-modal progressive comprehension for referring segmentation. IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 44, 9 (2021), 4761--4775.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"e_1_3_2_1_24_1","volume-title":"Rotated Multi-Scale Interaction Network for Referring Remote Sensing Image Segmentation. arXiv preprint arXiv:2312.12470","author":"Liu Sihan","year":"2023","unstructured":"Sihan Liu, Yiwei Ma, Xiaoqing Zhang, Haowei Wang, Jiayi Ji, Xiaoshuai Sun, and Rongrong Ji. 2023. Rotated Multi-Scale Interaction Network for Referring Remote Sensing Image Segmentation. arXiv preprint arXiv:2312.12470 (2023)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CISP.2013.6745242"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1080\/01431160050029567"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01963"},{"key":"e_1_3_2_1_30_1","volume-title":"Exploring Reliable Matching with Phase Enhancement for Night-time Semantic Segmentation. In European Conference on Computer Vision. Springer.","author":"Pan Yuwen","year":"2024","unstructured":"Yuwen Pan, Rui Sun, Naisong Luo, Tianzhu Zhang, and Yongdong Zhang. 2024. Exploring Reliable Matching with Phase Enhancement for Night-time Semantic Segmentation. In European Conference on Computer Vision. Springer."},{"key":"e_1_3_2_1_31_1","volume-title":"Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, et al. 2019. Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485128"},{"key":"e_1_3_2_1_33_1","volume-title":"U-net: Convolutional networks for biomedical image segmentation. In Medical Image Computing and Computer-Assisted Intervention--MICCAI 2015: 18th International Conference","author":"Ronneberger Olaf","year":"2015","unstructured":"Olaf Ronneberger, Philipp Fischer, and Thomas Brox. 2015. U-net: Convolutional networks for biomedical image segmentation. In Medical Image Computing and Computer-Assisted Intervention--MICCAI 2015: 18th International Conference, Munich, Germany, October 5--9, 2015, Proceedings, Part III 18. Springer, 234--241."},{"key":"e_1_3_2_1_34_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.isprsjprs.2017.06.003"},{"key":"e_1_3_2_1_36_1","volume-title":"DAW: Exploring the Better Weighting Function for Semi-supervised Semantic Segmentation. In Advances in Neural Information Processing Systems.","author":"Sun Rui","year":"2023","unstructured":"Rui Sun, Huayu Mai, Tianzhu Zhang, and Feng Wu. 2023. DAW: Exploring the Better Weighting Function for Semi-supervised Semantic Segmentation. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/PRRS.2018.8486170"},{"key":"e_1_3_2_1_38_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_39_1","volume-title":"DSViT: Dynamically scalable vision transformer for remote sensing image segmentation and classification","author":"Wang Falin","year":"2023","unstructured":"Falin Wang, Jian Ji, and Yuan Wang. 2023. DSViT: Dynamically scalable vision transformer for remote sensing image segmentation and classification. IEEE Journal of Selected Topics in Applied Earth Observations and Remote Sensing (2023)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19818-2_3"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.envsoft.2010.03.019"},{"key":"e_1_3_2_1_42_1","unstructured":"Yuxin Wu Alexander Kirillov Francisco Massa Wan-Yen Lo and Ross Girshick. 2019. Detectron2. https:\/\/github.com\/facebookresearch\/detectron2."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.3390\/rs13183585"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01762"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.2971171"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01075"},{"key":"e_1_3_2_1_47_1","volume-title":"RRSIS: Referring remote sensing image segmentation","author":"Yuan Zhenghang","year":"2024","unstructured":"Zhenghang Yuan, Lichao Mou, Yuansheng Hua, and Xiao Xiang Zhu. 2024. RRSIS: Referring remote sensing image segmentation. IEEE Transactions on Geoscience and Remote Sensing (2024)."}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681318","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681318","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:43Z","timestamp":1750295863000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681318"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":47,"alternative-id":["10.1145\/3664647.3681318","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681318","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}