{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,9]],"date-time":"2026-03-09T19:58:58Z","timestamp":1773086338459,"version":"3.50.1"},"reference-count":45,"publisher":"Informa UK Limited","issue":"1","license":[{"start":{"date-parts":[[2024,9,11]],"date-time":"2024-09-11T00:00:00Z","timestamp":1726012800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"funder":[{"name":"Aerospace Discipline Education New Engineering","award":["145AXL250004000X"],"award-info":[{"award-number":["145AXL250004000X"]}]}],"content-domain":{"domain":["www.tandfonline.com"],"crossmark-restriction":true},"short-container-title":["International Journal of Digital Earth"],"published-print":{"date-parts":[[2024,12,31]]},"DOI":"10.1080\/17538947.2024.2400988","type":"journal-article","created":{"date-parts":[[2024,9,11]],"date-time":"2024-09-11T07:41:03Z","timestamp":1726040463000},"update-policy":"https:\/\/doi.org\/10.1080\/tandf_crossmark_01","source":"Crossref","is-referenced-by-count":1,"title":["Region-guided transformer for remote sensing image captioning"],"prefix":"10.1080","volume":"17","author":[{"given":"Kai","family":"Zhao","sequence":"first","affiliation":[{"name":"National Key Laboratory of Space Target Awareness, Space Engineering University, Beijing, People's Republic of China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wei","family":"Xiong","sequence":"additional","affiliation":[{"name":"Science and Technology on Complex Electronic System Simulation Laboratory, Space Engineering University, Beijing, People's Republic of China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"301","published-online":{"date-parts":[[2024,9,11]]},"reference":[{"key":"e_1_3_4_2_1","doi-asserted-by":"crossref","unstructured":"Anderson P. X. He C. Buehler D. Teney M. Johnson S. Gould and L. Zhang. 2017. \u201cBottom-Up and Top-Down Attention for Image Captioning and Visual Question Answering.\u201d In 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition 6077\u20136086. Salt Lake City USA.","DOI":"10.1109\/CVPR.2018.00636"},{"key":"e_1_3_4_3_1","unstructured":"Banerjee S. and A. Lavie. 2004. \u201cMeteor: An Automatic Metric for MT Evaluation with High Levels of Correlation with Human Judgments\u201d. In Proceedings of ACL-WMT 65\u201372. Association for Computational Linguistics."},{"key":"e_1_3_4_4_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TGRS.2022.3201474","article-title":"NWPU-captions Dataset and MLCA-net for Remote Sensing Image Captioning","volume":"60","author":"Cheng Q.","year":"2022","unstructured":"Cheng, Q., H. Huang, Y. Xu, Y. Zhou, H. Li, and Z. Wang. 2022. \u201cNWPU-captions Dataset and MLCA-net for Remote Sensing Image Captioning.\u201d IEEE Transactions on Geoscience and Remote Sensing 60:1\u201319.","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"e_1_3_4_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2023.3305889"},{"key":"e_1_3_4_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2642953"},{"key":"e_1_3_4_7_1","doi-asserted-by":"crossref","unstructured":"He K. X. Zhang S. Ren and J. Sun. 2015. \u201cDeep Residual Learning for Image Recognition.\u201d In 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 770\u2013778. Las Vegas USA.","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_4_8_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"e_1_3_4_9_1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3994"},{"key":"e_1_3_4_10_1","doi-asserted-by":"crossref","unstructured":"Hoxha G. and F. Melgani. 2020. \u201cRemote Sensing Image Captioning with SVM-Based Decoding.\u201d In IGARSS 2020 \u2013 2020 IEEE International Geoscience and Remote Sensing Symposium 6734\u20136737. Waikoloa HI USA.","DOI":"10.1109\/IGARSS39084.2020.9323651"},{"key":"e_1_3_4_11_1","doi-asserted-by":"publisher","DOI":"10.1080\/17538947.2023.2283482"},{"key":"e_1_3_4_12_1","doi-asserted-by":"crossref","unstructured":"Huang L. W. Wang J. Chen and X.-Y. Wei. 2019. \u201cAttention on Attention for Image Captioning.\u201d In Proceedings of the IEEE\/CVF International Conference on Computer Vision 4634\u20134643. Seoul Korea (South).","DOI":"10.1109\/ICCV.2019.00473"},{"key":"e_1_3_4_13_1","unstructured":"Li S. G. Kulkarni T. Berg A. Berg and Y. Choi. 2011. \u201cComposing Simple Image Descriptions Using Web-Scale n-Grams\u201d. In Proceedings of the Fifteenth Conference on Computational Natural Language Learning 220\u2013228. Portland Oregon USA."},{"key":"e_1_3_4_14_1","first-page":"1","article-title":"Recurrent Attention and Semantic Gate for Remote Sensing Image Captioning","volume":"60","author":"Li Y.","year":"2022","unstructured":"Li, Y., X. Zhang, J. Gu, C. Li, X. Wang, X. Tang, and L. Jiao. 2022. \u201cRecurrent Attention and Semantic Gate for Remote Sensing Image Captioning.\u201d IEEE Transactions on Geoscience and Remote Sensing60:1\u201316.","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"e_1_3_4_15_1","doi-asserted-by":"crossref","unstructured":"Lin T.-Y. M. Maire S. Belongie J. Hays P. Perona D. Ramanan P. Doll\u00e1r and C. L. Zitnick. 2014. \u201cMicrosoft Coco: Common Objects in Context.\u201d In Computer Vision\u2013ECCV 2014: 13th European Conference Zurich Switzerland September 6\u201312 2014 Proceedings Part V 13 740\u2013755. Springer.","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_4_16_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TGRS.2022.3218921","article-title":"Remote Sensing Image Change Captioning with Dual-Branch Transformers: A New Method and a Large Scale Dataset","volume":"60","author":"Liu C.","year":"2022","unstructured":"Liu, C., R. Zhao, H. Chen, Z. Zou, and Z. X. Shi. 2022. \u201cRemote Sensing Image Change Captioning with Dual-Branch Transformers: A New Method and a Large Scale Dataset.\u201d IEEE Transactions on Geoscience and Remote Sensing 60:1\u201320.","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"e_1_3_4_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2017.2776321"},{"key":"e_1_3_4_18_1","unstructured":"Mao J. W. Xu Y. Yang J. Wang and A. L. Yuille. 2014. \u201cExplain Images with Multimodal Recurrent Neural Networks.\u201d ArXiv abs\/1410.1090."},{"key":"e_1_3_4_19_1","article-title":"Im2text: Describing Images Using 1 Million Captioned Photographs","volume":"24","author":"Ordonez V.","year":"2011","unstructured":"Ordonez, V., G. Kulkarni, and T. Berg. 2011. \u201cIm2text: Describing Images Using 1 Million Captioned Photographs.\u201d Advances in Neural Information Processing Systems 24","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_4_20_1","doi-asserted-by":"crossref","unstructured":"Papineni K. S. Roukos T. Ward and W.-J. Zhu. 2002. \u201cBleu: A Method for Automatic Evaluation of Machine Translation.\u201d In Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics\u00a0 (ACL) 311\u2013318.","DOI":"10.3115\/1073083.1073135"},{"key":"e_1_3_4_21_1","doi-asserted-by":"crossref","unstructured":"Pedersoli M. T. Lucas C. Schmid and J. Verbeek. 2017. \u201cAreas of Attention for Image Captioning.\u201d In Proceedings of the IEEE International Conference on Computer Vision 1242\u20131250. Venice Italy. IEEE.","DOI":"10.1109\/ICCV.2017.140"},{"key":"e_1_3_4_22_1","doi-asserted-by":"crossref","unstructured":"Perronnin F. Y. Liu J. S\u00e1nchez and H. Poirier. 2010. \u201cLarge-Scale Image Retrieval with Compressed Fisher Vectors.\u201d In 2010 IEEE Computer Society Conference on Computer Vision and Pattern Recognition 3384\u20133391. San Francisco USA.","DOI":"10.1109\/CVPR.2010.5540009"},{"key":"e_1_3_4_23_1","doi-asserted-by":"crossref","unstructured":"Qu B. X. Li D. Tao and X. Lu. 2016. \u201cDeep Semantic Understanding of High Resolution Remote Sensing Image.\u201d In 2016 International Conference on Computer Information and Telecommunication Systems (CITS) 1\u20135. Yunnan China. IEEE.","DOI":"10.1109\/CITS.2016.7546397"},{"key":"e_1_3_4_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2577031"},{"issue":"599","key":"e_1_3_4_25_1","first-page":"6","article-title":"Learning Internal Representations by Error Propagation, Parallel Distributed Processing, Explorations in the Microstructure of Cognition, Ed. De Rumelhart and J.\u00a0Mcclelland. Vol.\u00a01.\u00a01986","volume":"71","author":"Rumelhart D. E.","year":"1986","unstructured":"Rumelhart, D. E., G. E. Hinton, and R. J. Williams. 1986. \u201cLearning Internal Representations by Error Propagation, Parallel Distributed Processing, Explorations in the Microstructure of Cognition, Ed. De Rumelhart and J.\u00a0Mcclelland. Vol.\u00a01.\u00a01986.\u201d Biometrika 71 (599\u2013607): 6.","journal-title":"Biometrika"},{"key":"e_1_3_4_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-020-09294-7"},{"key":"e_1_3_4_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2017.2677464"},{"key":"e_1_3_4_28_1","doi-asserted-by":"crossref","unstructured":"Sivic J. and A. Zisserman. 2003. \u201cVideo Google: A Text Retrieval Approach to Object Matching in Videos.\u201d In Proceedings Ninth IEEE International Conference on Computer Vision Vol. 2. 1470\u20131477. Nice France.","DOI":"10.1109\/ICCV.2003.1238663"},{"key":"e_1_3_4_29_1","doi-asserted-by":"crossref","unstructured":"Sun C. C. Gan and R. Nevatia. 2015. \u201cAutomatic Concept Discovery from Parallel Text and Visual corpora.\u201d In 2015 IEEE International Conference on Computer Vision (ICCV) 2596\u20132604. Santiago Chile.","DOI":"10.1109\/ICCV.2015.298"},{"key":"e_1_3_4_30_1","unstructured":"Sutskever I. O. Vinyals and Q. V. Le. 2014. \u201cSequence to Sequence Learning with Neural Networks.\u201d ArXiv abs\/1409.3215."},{"key":"e_1_3_4_31_1","unstructured":"Vaswani A. 2017. \u201cAttention is All You Need.\u201d arXiv preprint arXiv:1706.03762."},{"key":"e_1_3_4_32_1","doi-asserted-by":"crossref","unstructured":"Vedantam R. C. L. Zitnick and D. Parikh. 2014. \u201cCider: Consensus-Based Image Description Evaluation.\u201d In 2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 4566\u20134575. Boston USA.","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"e_1_3_4_33_1","doi-asserted-by":"crossref","unstructured":"Vinyals O. A. Toshev S. Bengio and D. Erhan. 2014. \u201cShow and Tell: A Neural Image Caption Generator.\u201d In 2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 3156\u20133164. Boston USA.","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"e_1_3_4_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2022.3222606"},{"key":"e_1_3_4_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.8859"},{"key":"e_1_3_4_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2022.3153636"},{"key":"e_1_3_4_37_1","doi-asserted-by":"crossref","unstructured":"Wei Y. L. Li and S. Geng. 2023. \u201cRemote Sensing Image Captioning using Hire-MLP.\u201d In 2023 4th International Conference on Computer Vision Image and Deep Learning (CVIDL) 109\u2013112. Zhuhai China. IEEE.","DOI":"10.1109\/CVIDL58838.2023.10166056"},{"key":"e_1_3_4_38_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.isprsjprs.2022.02.001"},{"key":"e_1_3_4_39_1","unstructured":"Yang Y. C. Teo H. Daum\u00e9 III and Y. Aloimonos. 2011. \u201cCorpus-Guided Sentence Generation of Natural Images.\u201d In Proceedings of the 2011 Conference on Empirical Methods in Natural Language Processing 444\u2013454. Edinburgh UK: ACL."},{"key":"e_1_3_4_40_1","doi-asserted-by":"crossref","unstructured":"Yao T. Y. Pan Y. Li and T. Mei. 2017. \u201cIncorporating Copying Mechanism in Image Captioning for learning novel objects.\u201d In 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 5263\u20135271. Honolulu Hawaii.","DOI":"10.1109\/CVPR.2017.559"},{"key":"e_1_3_4_41_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TGRS.2022.3224244","article-title":"A Joint-Training Two-Stage Method for Remote Sensing Image Captioning","volume":"60","author":"Ye X.","year":"2022","unstructured":"Ye, X., S. Wang, Y. Gu, J. Wang, R. Wang, B. Hou, F. Giunchiglia, and L. Jiao. 2022. \u201cA Joint-Training Two-Stage Method for Remote Sensing Image Captioning.\u201d IEEE Transactions on Geoscience and Remote Sensing 60:1\u201316.","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"e_1_3_4_42_1","doi-asserted-by":"publisher","DOI":"10.3390\/rs11202349"},{"key":"e_1_3_4_43_1","first-page":"1","article-title":"Global Visual Feature and Linguistic State Guided Attention for Remote Sensing Image Captioning","volume":"60","author":"Zhang Z.","year":"2022","unstructured":"Zhang, Z., W. Zhang, M. Yan, X. Gao, K. Fu, and X. Sun. 2022. \u201cGlobal Visual Feature and Linguistic State Guided Attention for Remote Sensing Image Captioning.\u201d IEEE Transactions on Geoscience and Remote Sensing 60:1\u201316.","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"e_1_3_4_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.36"},{"key":"e_1_3_4_45_1","doi-asserted-by":"publisher","DOI":"10.3390\/electronics12071547"},{"key":"e_1_3_4_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2021.3135711"}],"container-title":["International Journal of Digital Earth"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.tandfonline.com\/doi\/pdf\/10.1080\/17538947.2024.2400988","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,21]],"date-time":"2025-01-21T18:57:40Z","timestamp":1737485860000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.tandfonline.com\/doi\/full\/10.1080\/17538947.2024.2400988"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,11]]},"references-count":45,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2024,12,31]]}},"alternative-id":["10.1080\/17538947.2024.2400988"],"URL":"https:\/\/doi.org\/10.1080\/17538947.2024.2400988","relation":{},"ISSN":["1753-8947","1753-8955"],"issn-type":[{"value":"1753-8947","type":"print"},{"value":"1753-8955","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,9,11]]},"assertion":[{"value":"The publishing and review policy for this title is described in its Aims & Scope.","order":1,"name":"peerreview_statement","label":"Peer Review Statement"},{"value":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tjde20","URL":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tjde20","order":2,"name":"aims_and_scope_url","label":"Aim & Scope"},{"value":"2024-01-12","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2024-08-26","order":2,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2024-09-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}],"article-number":"2400988"}}