{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,28]],"date-time":"2026-05-28T02:05:29Z","timestamp":1779933929757,"version":"3.53.1"},"publisher-location":"Cham","reference-count":31,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031789793","type":"print"},{"value":"9783031789809","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-78980-9_15","type":"book-chapter","created":{"date-parts":[[2025,1,27]],"date-time":"2025-01-27T10:26:33Z","timestamp":1737973593000},"page":"231-245","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["Towards a\u00a0Multimodal Framework for\u00a0Remote Sensing Image Change Retrieval and\u00a0Captioning"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7932-8249","authenticated-orcid":false,"given":"Roger","family":"Ferrod","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7570-637X","authenticated-orcid":false,"given":"Luigi","family":"Di Caro","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8736-3132","authenticated-orcid":false,"given":"Dino","family":"Ienco","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,1,28]]},"reference":[{"key":"15_CR1","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s40537-023-00772-x","volume":"10","author":"AA Adegun","year":"2023","unstructured":"Adegun, A.A., Viriri, S., Tapamo, J.R.: Review of deep learning methods for remote sensing satellite images classification: experimental survey and comparative analysis. J. Big Data 10, 1\u201324 (2023)","journal-title":"J. Big Data"},{"key":"15_CR2","doi-asserted-by":"crossref","unstructured":"Chang, S., Ghamisi, P.: Changes to captions: an attentive network for remote sensing change captioning. IEEE Trans. Image Process 32, 6047\u20136060 (2023)","DOI":"10.1109\/TIP.2023.3328224"},{"key":"15_CR3","unstructured":"Chen, T.S., Hung, W.C., Tseng, H.Y., Chien, S.Y., Yang, M.H.: Incremental false negative detection for contrastive learning. In: International Conference on Learning Representations (2022)"},{"key":"15_CR4","doi-asserted-by":"crossref","unstructured":"Chouaf, S., Hoxha, G., Smara, Y., Melgani, F.: Captioning changes in bi-temporal remote sensing images. In: 2021 IEEE International Geoscience and Remote Sensing Symposium IGARSS, pp. 2891\u20132894 (2021)","DOI":"10.1109\/IGARSS47720.2021.9554419"},{"key":"15_CR5","doi-asserted-by":"crossref","unstructured":"Guo, Z., Wang, T., Laaksonen, J.T.: CLIP4IDC: clip for image difference captioning. ArXiv abs\/2206.00629 (2022)","DOI":"10.18653\/v1\/2022.aacl-short.5"},{"key":"15_CR6","first-page":"1","volume":"60","author":"G Hoxha","year":"2022","unstructured":"Hoxha, G., Chouaf, S., Melgani, F., Smara, Y.: Change captioning: a new paradigm for multitemporal remote sensing image analysis. IEEE Trans. Geosci. Remote Sens. 60, 1\u201314 (2022)","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"15_CR7","unstructured":"Hu, Y., Yuan, J., Wen, C., Lu, X., Li, X.: RSGPT: a remote sensing vision language model and benchmark (2023)"},{"key":"15_CR8","doi-asserted-by":"crossref","unstructured":"Huynh, T., Kornblith, S., Walter, M.R., Maire, M., Khademi, M.: Boosting contrastive self-supervised learning with false negative cancellation. In: 2022 IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV), pp. 986\u2013996 (2022)","DOI":"10.1109\/WACV51458.2022.00106"},{"key":"15_CR9","unstructured":"Jia, C., et al.: Scaling up visual and vision-language representation learning with noisy text supervision. In: Meila, M., Zhang, T. (eds.) Proceedings of the 38th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol.\u00a0139, pp. 4904\u20134916. PMLR (18\u201324 Jul 2021) (2021)"},{"issue":"3","key":"15_CR10","doi-asserted-by":"publisher","first-page":"535","DOI":"10.1109\/TBDATA.2019.2921572","volume":"7","author":"J Johnson","year":"2019","unstructured":"Johnson, J., Douze, M., J\u00e9gou, H.: Billion-scale similarity search with GPUs. IEEE Trans. Big Data 7(3), 535\u2013547 (2019)","journal-title":"IEEE Trans. Big Data"},{"key":"15_CR11","unstructured":"Li, J., Selvaraju, R.R., Gotmare, A.D., Joty, S.R., Xiong, C., Hoi, S.C.H.: Align before fuse: vision and language representation learning with momentum distillation. In: Neural Information Processing Systems (2021)"},{"key":"15_CR12","unstructured":"Lin, C.Y.: ROUGE: a package for automatic evaluation of summaries. In: Text Summarization Branches Out, pp. 74\u201381. Association for Computational Linguistics, Barcelona, Spain (2004)"},{"key":"15_CR13","first-page":"1","volume":"60","author":"C Liu","year":"2022","unstructured":"Liu, C., Zhao, R., Chen, H., Zou, Z., Shi, Z.: Remote sensing image change captioning with dual-branch transformers: a new method and a large scale dataset. IEEE Trans. Geosci. Remote Sens. 60, 1\u201320 (2022)","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"15_CR14","doi-asserted-by":"crossref","unstructured":"Liu, F., et al.: RemoteCLIP: a vision language foundation model for remote sensing (2024)","DOI":"10.1109\/TGRS.2024.3390838"},{"key":"15_CR15","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T. and Zhu, W.J.: BLEU: a method for automatic evaluation of machine translation, pp. 311\u2013318 (2002)","DOI":"10.3115\/1073083.1073135"},{"issue":"1","key":"15_CR16","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1080\/10095020.2022.2128902","volume":"27","author":"Q Zhu","year":"2024","unstructured":"Zhu, Q., Guo, X., Li, Z.L., Li, D.: A review of multi-class change detection for satellite remote sensing imagery. Geo-spatial Inf. Sci. 27(1), 1\u201315 (2024)","journal-title":"Geo-spatial Inf. Sci."},{"key":"15_CR17","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning (2021)"},{"key":"15_CR18","doi-asserted-by":"publisher","first-page":"9115","DOI":"10.1109\/JSTARS.2022.3215803","volume":"15","author":"MMA Rahhal","year":"2022","unstructured":"Rahhal, M.M.A., Bazi, Y., Alsharif, N.A., Bashmal, L., Alajlan, N.A., Melgani, F.: Multilanguage transformer for improved text to remote sensing image retrieval. IEEE J. Sel. Top. Appl. Earth Observ. Remote Sens. 15, 9115\u20139126 (2022)","journal-title":"IEEE J. Sel. Top. Appl. Earth Observ. Remote Sens."},{"key":"15_CR19","doi-asserted-by":"crossref","unstructured":"Reimers, N., Gurevych, I.: Sentence-BERT: sentence embeddings using Siamese BERT-networks. In: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing. Association for Computational Linguistics (2019)","DOI":"10.18653\/v1\/D19-1410"},{"key":"15_CR20","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TGRS.2023.3335418","volume":"61","author":"X Sun","year":"2023","unstructured":"Sun, X., et al.: RingMo: a remote sensing foundation model with masked image modeling. IEEE Trans. Geosci. Remote Sens. 61, 1\u201322 (2023)","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"15_CR21","doi-asserted-by":"publisher","first-page":"2620","DOI":"10.1109\/TIP.2023.3268004","volume":"32","author":"Y Tu","year":"2023","unstructured":"Tu, Y., Li, L., Su, L., Du, J., Lu, K., Huang, Q.: Viewpoint-adaptive representation disentanglement network for change captioning. IEEE Trans. Image Process. 32, 2620\u20132635 (2023)","journal-title":"IEEE Trans. Image Process."},{"key":"15_CR22","doi-asserted-by":"publisher","first-page":"9518","DOI":"10.1109\/TMM.2023.3254162","volume":"25","author":"Y Tu","year":"2023","unstructured":"Tu, Y., Li, L., Su, L., Lu, K., Huang, Q.: Neighborhood contrastive transformer for change captioning. IEEE Trans. Multimedia 25, 9518\u20139529 (2023)","journal-title":"IEEE Trans. Multimedia"},{"key":"15_CR23","doi-asserted-by":"crossref","unstructured":"Vedantam, R., Zitnick, C.L., Parikh, D.: Cider: Consensus-based image description evaluation. In: 2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4566\u20134575 (2015)","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"15_CR24","first-page":"1","volume":"61","author":"D Wang","year":"2022","unstructured":"Wang, D., et al.: Advancing plain vision transformer toward remote sensing foundation model. IEEE Trans. Geosci. Remote Sens. 61, 1\u201315 (2022)","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"15_CR25","doi-asserted-by":"crossref","unstructured":"Wang, F., Liu, H.: Understanding the behaviour of contrastive loss. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2495\u20132504 (2020)","DOI":"10.1109\/CVPR46437.2021.00252"},{"key":"15_CR26","doi-asserted-by":"crossref","unstructured":"Xu, L., Xie, H., Wang, F.L., Tao, X., Wang, W., Li, Q.: Contrastive sentence representation learning with adaptive false negative cancellation. Inf. Fusion 102(C), 102065 (2024)","DOI":"10.1016\/j.inffus.2023.102065"},{"key":"15_CR27","doi-asserted-by":"crossref","unstructured":"Yao, L., Wang, W., Jin, Q.: Image difference captioning with pre-training and contrastive learning. In: AAAI Conference on Artificial Intelligence (2022)","DOI":"10.1609\/aaai.v36i3.20218"},{"key":"15_CR28","doi-asserted-by":"crossref","unstructured":"Yu, J., Wang, Z., Vasudevan, V., Yeung, L., Seyedhosseini, M., Wu, Y.: CoCa: contrastive captioners are image-text foundation models. Trans. Mach. Learn. Res. 36(3), 3108\u20133116 (2022)","DOI":"10.1609\/aaai.v36i3.20218"},{"key":"15_CR29","doi-asserted-by":"crossref","unstructured":"Zhang, J., Zhou, Z., Mai, G., Mu, L., Hu, M., Li, S.: Text2Seg: remote sensing image semantic segmentation via text-guided visual foundation models. ArXiv abs\/2304.10597 (2023)","DOI":"10.1145\/3687123.3698287"},{"key":"15_CR30","doi-asserted-by":"crossref","unstructured":"Zhang, W., Cai, M., Zhang, T., Zhuang, Y., Mao, X.: EarthGPT: a universal multi-modal large language model for multi-sensor image comprehension in remote sensing domain (2024)","DOI":"10.1109\/TGRS.2024.3409624"},{"key":"15_CR31","doi-asserted-by":"publisher","first-page":"1447","DOI":"10.1109\/JSTARS.2023.3236662","volume":"16","author":"W Zhou","year":"2023","unstructured":"Zhou, W., Guan, H., Li, Z., Shao, Z., Delavar, M.R.: Remote sensing image retrieval in the past decade: achievements, challenges, and future directions. IEEE J. Sel. Top. Appl. Earth Observ. Remote Sens. 16, 1447\u20131473 (2023)","journal-title":"IEEE J. Sel. Top. Appl. Earth Observ. Remote Sens."}],"container-title":["Lecture Notes in Computer Science","Discovery Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-78980-9_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,27]],"date-time":"2025-01-27T10:26:56Z","timestamp":1737973616000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-78980-9_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031789793","9783031789809"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-78980-9_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"28 January 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Discovery Science","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Pisa","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 October 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"dis2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/ds2024.isti.cnr.it\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}