{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,25]],"date-time":"2025-08-25T12:10:06Z","timestamp":1756123806742,"version":"3.44.0"},"reference-count":48,"publisher":"Informa UK Limited","issue":"1","license":[{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"content-domain":{"domain":["www.tandfonline.com"],"crossmark-restriction":true},"short-container-title":["International Journal of Digital Earth"],"published-print":{"date-parts":[[2025,8,25]]},"DOI":"10.1080\/17538947.2025.2526102","type":"journal-article","created":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T11:03:17Z","timestamp":1751367797000},"update-policy":"https:\/\/doi.org\/10.1080\/tandf_crossmark_01","source":"Crossref","is-referenced-by-count":0,"title":["Contrastive learning based remote sensing text-to-image generation for few-shot remote sensing image captioning"],"prefix":"10.1080","volume":"18","author":[{"given":"Haonan","family":"Zhou","sequence":"first","affiliation":[{"name":"Beijing Institute of Remote Sensing Information","place":["Beijing, People\u2019s Republic of China"]}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hang","family":"Tang","sequence":"additional","affiliation":[{"name":"Beijing Institute of Remote Sensing Information","place":["Beijing, People\u2019s Republic of China"]}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiangchun","family":"Liu","sequence":"additional","affiliation":[{"name":"Beijing Institute of Remote Sensing Information","place":["Beijing, People\u2019s Republic of China"]}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaoxiao","family":"Shi","sequence":"additional","affiliation":[{"name":"Beijing Institute of Remote Sensing Information","place":["Beijing, People\u2019s Republic of China"]}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lurui","family":"Xia","sequence":"additional","affiliation":[{"name":"Space Engineering University","place":["Beijing, People\u2019s Republic of China"]}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"301","published-online":{"date-parts":[[2025,7]]},"reference":[{"key":"e_1_3_2_2_1","doi-asserted-by":"crossref","unstructured":"Anderson Peter Basura Fernando Mark Johnson and Stephen Gould. 2016. \u201cSPICE: Semantic Propositional Image Caption Evaluation.\u201d Paper presented at the 2016 European Conference on Computer Vision (ECCV).","DOI":"10.1007\/978-3-319-46454-1_24"},{"key":"e_1_3_2_3_1","unstructured":"Banerjee Satanjeev and Alon Lavie. 2005. \u201cMETEOR: An Automatic Metric for MT Evaluation with Improved Correlation with Human Judgments.\u201d Paper presented at the IEEvaluation@ACL."},{"key":"e_1_3_2_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2020.2983851"},{"key":"e_1_3_2_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2019.2895693"},{"key":"e_1_3_2_6_1","unstructured":"Belousov Sergei. 2021. MobileStyleGAN: A lightweight convolutional neural network for high-fidelity image synthesis.\u201d arXiv preprint arXiv:2104.04767."},{"key":"e_1_3_2_7_1","unstructured":"Ding Ming Zhuoyi Yang Wenyi Hong Wendi Zheng Chang Zhou Da Yin Junyang Lin et al. 2021. \u201cCogView: Mastering Text-to-Image Generation via Transformers.\u201d Paper presented at the Neural Information Processing Systems."},{"key":"e_1_3_2_8_1","unstructured":"Ding Ming Wendi Zheng Wenyi Hong and Jie Tang. 2022. CogView2: Faster and Better Text-to-Image Generation via Hierarchical Transformers.\u201d ArXiv abs\/2204.14217."},{"key":"e_1_3_2_9_1","unstructured":"Heusel Martin Hubert Ramsauer Thomas Unterthiner Bernhard Nessler and Sepp Hochreiter. 2017. \u201cGANs Trained by a Two Time-Scale Update Rule Converge to a Local Nash Equilibrium.\u201d Paper presented at the Neural Information Processing Systems."},{"key":"e_1_3_2_10_1","unstructured":"Ho Jonathan Ajay Jain and P. Abbeel. 2020. \u201cDenoising Diffusion Probabilistic Models.\u201d ArXiv abs\/2006.11239."},{"key":"e_1_3_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2022.3195692"},{"key":"e_1_3_2_12_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2024.111028"},{"key":"e_1_3_2_13_1","doi-asserted-by":"crossref","unstructured":"Kang Minguk Jun-Yan Zhu Richard Zhang Jaesik Park Eli Shechtman Sylvain Paris and Taesung Park. 2023. \u201cScaling up GANs for Text-to-Image Synthesis.\u201d 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR):10124\u201310134.","DOI":"10.1109\/CVPR52729.2023.00976"},{"key":"e_1_3_2_14_1","unstructured":"Lin Chin-Yew. 2004. \u201cROUGE: A Package for Automatic Evaluation of Summaries.\u201d Paper presented at the Annual Meeting of the Association for Computational Linguistics."},{"key":"e_1_3_2_15_1","doi-asserted-by":"crossref","unstructured":"Lin Tsung-Yi Michael Maire Serge Belongie James Hays Pietro Perona Deva Ramanan Piotr Doll\u00e1r and C Lawrence Zitnick. 2014. \u201cMicrosoft coco: Common objects in context.\u201d 2014 European Conference on Computer Vision (ECCV):740\u2013755.","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_2_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-024-02196-3"},{"key":"e_1_3_2_17_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2024.111077"},{"key":"e_1_3_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2017.2776321"},{"key":"e_1_3_2_19_1","doi-asserted-by":"crossref","unstructured":"Meng Lingwu Jing Wang Ran Meng Yang Yang and Liang Xiao. 2024. \u201cA multiscale grouping transformer with clip latents for remote sensing image captioning.\u201d IEEE Transactions on Geoscience and Remote Sensing.","DOI":"10.1109\/TGRS.2024.3385500"},{"key":"e_1_3_2_20_1","unstructured":"Mirza Mehdi and Simon Osindero. 2014. Conditional Generative Adversarial Nets.\u201d ArXiv abs\/1411.1784."},{"key":"e_1_3_2_21_1","doi-asserted-by":"publisher","DOI":"10.1080\/17538947.2024.2392847"},{"key":"e_1_3_2_22_1","unstructured":"Oord A\u00e4ron van den Nal Kalchbrenner Lasse Espeholt Koray Kavukcuoglu Oriol Vinyals and Alex Graves. 2016. \u201cConditional Image Generation with PixelCNN Decoders.\u201d ArXiv abs\/1606.05328."},{"key":"e_1_3_2_23_1","doi-asserted-by":"crossref","unstructured":"Papineni Kishore Salim Roukos Todd Ward and Wei-Jing Zhu. 2002. \u201cBleu: a Method for Automatic Evaluation of Machine Translation.\u201d Paper presented at the Annual Meeting of the Association for Computational Linguistics.","DOI":"10.3115\/1073083.1073135"},{"key":"e_1_3_2_24_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jag.2024.104282"},{"key":"e_1_3_2_25_1","doi-asserted-by":"crossref","unstructured":"Qu Bo Xuelong Li Dacheng Tao and Xiaoqiang Lu. 2016. \u201cDeep semantic understanding of high resolution remote sensing image.\u201d 2016 International Conference on Computer Information and Telecommunication Systems (CITS):1\u20135.","DOI":"10.1109\/CITS.2016.7546397"},{"key":"e_1_3_2_26_1","unstructured":"Radford Alec Jong Wook Kim Chris Hallacy Aditya Ramesh Gabriel Goh Sandhini Agarwal Girish Sastry et al. 2021. \u201cLearning Transferable Visual Models From Natural Language Supervision.\u201d Paper presented at the International Conference on Machine Learning."},{"key":"e_1_3_2_27_1","unstructured":"Ramesh Aditya Mikhail Pavlov Gabriel Goh Scott Gray Chelsea Voss Alec Radford Mark Chen and Ilya Sutskever. 2021. Zero-Shot Text-to-Image Generation.\u201d ArXiv abs\/2102.12092."},{"key":"e_1_3_2_28_1","unstructured":"Reed Scott E. Zeynep Akata Xinchen Yan Lajanugen Logeswaran Bernt Schiele and Honglak Lee. 2016. \u201cGenerative Adversarial Text to Image Synthesis.\u201d Paper presented at the International Conference on Machine Learning."},{"key":"e_1_3_2_29_1","doi-asserted-by":"crossref","unstructured":"Rombach Robin Andreas Blattmann Dominik Lorenz Patrick Esser and Bj\u00f6rn Ommer. 2022. \u201cHigh-resolution image synthesis with latent diffusion models.\u201d 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR):10684\u201310695.","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_30_1","unstructured":"Salimans Tim Ian J. Goodfellow Wojciech Zaremba Vicki Cheung Alec Radford and Xi Chen. 2016. \u201cImproved Techniques for Training GANs.\u201d ArXiv abs\/1606.03498."},{"key":"e_1_3_2_31_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-024-10363-3"},{"key":"e_1_3_2_32_1","unstructured":"Silva Jo\u00e3o Daniel Joao Magalhaes Devis Tuia and Bruno Martins. 2024. \u201cLarge Language Models for Captioning and Retrieving Remote Sensing Images.\u201d ArXiv abs\/2402.06475."},{"key":"e_1_3_2_33_1","unstructured":"Sohl-Dickstein Jascha Narain Eric A. Weiss Niru Maheswaranathan and Surya Ganguli. 2015. \u201cDeep Unsupervised Learning using Nonequilibrium Thermodynamics.\u201d ArXiv abs\/1503.03585."},{"key":"e_1_3_2_34_1","doi-asserted-by":"crossref","unstructured":"Szegedy Christian Vincent Vanhoucke Sergey Ioffe Jonathon Shlens and Zbigniew Wojna. 2015. \u201cRethinking the Inception Architecture for Computer Vision.\u201d 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR):2818\u20132826.","DOI":"10.1109\/CVPR.2016.308"},{"key":"e_1_3_2_35_1","doi-asserted-by":"crossref","unstructured":"Tang Datao Xiangyong Cao Xingsong Hou Zhongyuan Jiang Junmin Liu and Deyu Meng. 2024. \u201cCrs-diff: Controllable Remote Sensing Image Generation with Diffusion Model.\u201d IEEE Transactions on Geoscience and Remote Sensing.","DOI":"10.1109\/TGRS.2024.3453414"},{"key":"e_1_3_2_36_1","doi-asserted-by":"crossref","unstructured":"Tao Ming Hao Tang Fei Wu Xiaoyuan Jing Bingkun Bao and Changsheng Xu. 2020. \u201cDF-GAN: A Simple and Effective Baseline for Text-to-Image Synthesis.\u201d 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR):16494\u201316504.","DOI":"10.1109\/CVPR52688.2022.01602"},{"key":"e_1_3_2_37_1","doi-asserted-by":"crossref","unstructured":"Vedantam Ramakrishna C. Lawrence Zitnick and Devi Parikh. 2014. \u201cCIDEr: Consensus-based image description evaluation.\u201d 2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR):4566\u20134575.","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"e_1_3_2_38_1","doi-asserted-by":"crossref","unstructured":"Wang Jian Fan Li Song Lv Lijun He and Chao Shen. 2025. \u201cPhysically Realizable Adversarial Creating Attack against Vision-based BEV Space 3D Object Detection.\u201d IEEE Transactions on Image Processing.","DOI":"10.1109\/TIP.2025.3526056"},{"key":"e_1_3_2_39_1","doi-asserted-by":"crossref","unstructured":"Wu Zongze Dani Lischinski and Eli Shechtman. 2020. \u201cStyleSpace Analysis: Disentangled Controls for StyleGAN Image Generation.\u201d 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR):12858\u201312867.","DOI":"10.1109\/CVPR46437.2021.01267"},{"key":"e_1_3_2_40_1","doi-asserted-by":"crossref","unstructured":"Wu Yue Jinlong Sheng Hangqi Ding Peiran Gong Hao Li Maoguo Gong Wenping Ma and Qiguang Miao. 2024. \u201cEvolutionary multitasking descriptor optimization for point cloud registration.\u201d IEEE Transactions on Evolutionary Computation.","DOI":"10.1109\/TEVC.2024.3417416"},{"key":"e_1_3_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2023.3323799"},{"key":"e_1_3_2_42_1","doi-asserted-by":"crossref","unstructured":"Xu Tao Pengchuan Zhang Qiuyuan Huang Han Zhang Zhe Gan Xiaolei Huang and Xiaodong He. 2017. \u201cAttnGAN: Fine-Grained Text to Image Generation with Attentional Generative Adversarial Networks.\u201d 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR):1316\u20131324.","DOI":"10.1109\/CVPR.2018.00143"},{"key":"e_1_3_2_43_1","doi-asserted-by":"crossref","unstructured":"Yang Qiaoqiao Zihao Ni and Pengxin Ren. 2022. \u201cMeta captioning: A meta learning based remote sensing image captioning framework.\u201d ISPRS Journal of Photogrammetry and Remote Sensing.","DOI":"10.1016\/j.isprsjprs.2022.02.001"},{"key":"e_1_3_2_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2023.3329578"},{"key":"e_1_3_2_45_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.isprsjprs.2025.01.020"},{"key":"e_1_3_2_46_1","first-page":"1","article-title":"Text-to-Remote-Sensing-Image Generation with Structured Generative Adversarial Networks","volume":"19","author":"Zhao Rui","year":"2022","unstructured":"Zhao, Rui, and Zhenwei Shi. 2022. \u201cText-to-Remote-Sensing-Image Generation with Structured Generative Adversarial Networks.\u201d IEEE Geoscience and Remote Sensing Letters 19:1\u20135.","journal-title":"IEEE Geoscience and Remote Sensing Letters"},{"key":"e_1_3_2_47_1","doi-asserted-by":"crossref","unstructured":"Zhou Yufan Bingchen Liu Yizhe Zhu Xiao Yang Changyou Chen and Jinhui Xu. 2022. \u201cShifted Diffusion for Text-to-image Generation.\u201d 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR):10157\u201310166.","DOI":"10.1109\/CVPR52729.2023.00979"},{"key":"e_1_3_2_48_1","doi-asserted-by":"publisher","DOI":"10.1080\/17538947.2024.2337240"},{"key":"e_1_3_2_49_1","doi-asserted-by":"crossref","unstructured":"Zhu Minfeng Pingbo Pan Wei Chen and Yi Yang. 2019. \u201cDM-GAN: Dynamic Memory Generative Adversarial Networks for Text-To-Image Synthesis.\u201d 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR):5795\u20135803.","DOI":"10.1109\/CVPR.2019.00595"}],"container-title":["International Journal of Digital Earth"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.tandfonline.com\/doi\/pdf\/10.1080\/17538947.2025.2526102","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,25]],"date-time":"2025-08-25T11:28:51Z","timestamp":1756121331000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.tandfonline.com\/doi\/full\/10.1080\/17538947.2025.2526102"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7]]},"references-count":48,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,8,25]]}},"alternative-id":["10.1080\/17538947.2025.2526102"],"URL":"https:\/\/doi.org\/10.1080\/17538947.2025.2526102","relation":{},"ISSN":["1753-8947","1753-8955"],"issn-type":[{"type":"print","value":"1753-8947"},{"type":"electronic","value":"1753-8955"}],"subject":[],"published":{"date-parts":[[2025,7]]},"assertion":[{"value":"The publishing and review policy for this title is described in its Aims & Scope.","order":1,"name":"peerreview_statement","label":"Peer Review Statement"},{"value":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tjde20","URL":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tjde20","order":2,"name":"aims_and_scope_url","label":"Aim & Scope"},{"value":"2024-11-19","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-06-21","order":2,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-07-01","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}],"article-number":"2526102"}}