{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T16:23:42Z","timestamp":1773246222250,"version":"3.50.1"},"reference-count":72,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Key Scientific Technological Innovation Research Project of the Ministry of Education Joint Funds of the National Natural Science Foundation of China","award":["U22B2054"],"award-info":[{"award-number":["U22B2054"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62076192"],"award-info":[{"award-number":["62076192"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62276199"],"award-info":[{"award-number":["62276199"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"111 Project through the Program for Cheung Kong Scholars and Innovative Research Team in University","award":["IRT 15R53"],"award-info":[{"award-number":["IRT 15R53"]}]},{"DOI":"10.13039\/501100001809","name":"Science and Technology Innovation Project from the Chinese Ministry of Education","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Geosci. Remote Sensing"],"published-print":{"date-parts":[[2024]]},"DOI":"10.1109\/tgrs.2024.3475633","type":"journal-article","created":{"date-parts":[[2024,10,7]],"date-time":"2024-10-07T17:44:39Z","timestamp":1728323079000},"page":"1-12","source":"Crossref","is-referenced-by-count":23,"title":["TrTr-CMR: Cross-Modal Reasoning Dual Transformer for Remote Sensing Image Captioning"],"prefix":"10.1109","volume":"62","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-9149-3389","authenticated-orcid":false,"given":"Yinan","family":"Wu","sequence":"first","affiliation":[{"name":"Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, International Research Center for Intelligent Perception and Computation, Joint International Research Laboratory of Intelligent Perception and Computation, School of Artificial Intelligence, Xidian University, Xi&#x2019;an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6130-2518","authenticated-orcid":false,"given":"Lingling","family":"Li","sequence":"additional","affiliation":[{"name":"Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, International Research Center for Intelligent Perception and Computation, Joint International Research Laboratory of Intelligent Perception and Computation, School of Artificial Intelligence, Xidian University, Xi&#x2019;an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3354-9617","authenticated-orcid":false,"given":"Licheng","family":"Jiao","sequence":"additional","affiliation":[{"name":"Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, International Research Center for Intelligent Perception and Computation, Joint International Research Laboratory of Intelligent Perception and Computation, School of Artificial Intelligence, Xidian University, Xi&#x2019;an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5669-9354","authenticated-orcid":false,"given":"Fang","family":"Liu","sequence":"additional","affiliation":[{"name":"Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, International Research Center for Intelligent Perception and Computation, Joint International Research Laboratory of Intelligent Perception and Computation, School of Artificial Intelligence, Xidian University, Xi&#x2019;an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8780-5455","authenticated-orcid":false,"given":"Xu","family":"Liu","sequence":"additional","affiliation":[{"name":"Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, International Research Center for Intelligent Perception and Computation, Joint International Research Laboratory of Intelligent Perception and Computation, School of Artificial Intelligence, Xidian University, Xi&#x2019;an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4796-5737","authenticated-orcid":false,"given":"Shuyuan","family":"Yang","sequence":"additional","affiliation":[{"name":"Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, International Research Center for Intelligent Perception and Computation, Joint International Research Laboratory of Intelligent Perception and Computation, School of Artificial Intelligence, Xidian University, Xi&#x2019;an, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2023.3344116"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3326949"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2020.3016820"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.3390\/rs15225314"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2022.3187015"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2023.3339970"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2023.3325997"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/MGRS.2022.3145854"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CITS.2016.7546397"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2017.2776321"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2019.2914351"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2019.2895693"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2023.3323799"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN48605.2020.9207381"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2020.3044054"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2022.3222606"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/j.isprsjprs.2022.02.001"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2022.3150957"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15561-1_2"},{"key":"ref21","first-page":"1","article-title":"Im2Text: Describing images using 1 million captioned photographs","volume-title":"Proc. 24th Int. Conf. Neural Inf. Process. Syst.","volume":"24","author":"Ordonez"},{"key":"ref22","first-page":"1","article-title":"Deep fragment embeddings for bidirectional image sentence mapping","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"27","author":"Karpathy"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2012.162"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00188"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.5555\/3045118.3045336"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6898"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.667"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i2.16258"},{"key":"ref30","article-title":"CPTR: Full transformer network for image captioning","author":"Liu","year":"2021","journal-title":"arXiv:2101.10804"},{"key":"ref31","first-page":"1","article-title":"An image is worth 16\u00d716 words: Transformers for image recognition at scale","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Dosovitskiy"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.7005"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00277"},{"key":"ref34","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Brown"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2022.3218921"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2020.2988782"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2023.3250471"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2023.3321752"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2024.3404604"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2017.2677464"},{"key":"ref41","article-title":"Very deep convolutional networks for large-scale image recognition","author":"Simonyan","year":"2014","journal-title":"arXiv:1409.1556"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1023\/b:visi.0000029664.99615.94"},{"key":"ref43","first-page":"1470","article-title":"Video Google: A text retrieval approach to object matching in videos","volume-title":"Proc. 9th IEEE Int. Conf. Comput. Vis.","author":"Zisserman"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5540009"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2011.235"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/IGARSS.2019.8900503"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2020.2980933"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2020.3010106"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2021.3070383"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.109893"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2020.105920"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2021.3135711"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2024.3359316"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2023.3328181"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2024.3385500"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2016.2577031"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref59","first-page":"1","article-title":"DeViSE: A deep visual-semantic embedding model","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"26","author":"Frome"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2019.2893772"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2019.2951636"},{"key":"ref62","volume-title":"The Illustrated Image Captioning Using Transformers","author":"Kumar","year":"2022"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-018-5856-1"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1145\/1869790.1869829"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2014.2357078"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"ref67","first-page":"65","article-title":"METEOR: An automatic metric for MT evaluation with improved correlation with human judgments","volume-title":"Proc. 2nd Workshop Stat. Mach. Transl. StatMT","author":"Banerjee"},{"key":"ref68","first-page":"74","article-title":"ROUGE: A package for automatic evaluation of summaries","volume-title":"Proc. Text Summarization Branches Out","author":"Lin"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"ref70","article-title":"Decoupled weight decay regularization","author":"Loshchilov","year":"2017","journal-title":"arXiv:1711.05101"},{"issue":"8","key":"ref71","first-page":"9","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI Blog"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/MGRS.2024.3383473"}],"container-title":["IEEE Transactions on Geoscience and Remote Sensing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/36\/10354519\/10706929.pdf?arnumber=10706929","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,27]],"date-time":"2024-11-27T00:40:29Z","timestamp":1732668029000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10706929\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"references-count":72,"URL":"https:\/\/doi.org\/10.1109\/tgrs.2024.3475633","relation":{},"ISSN":["0196-2892","1558-0644"],"issn-type":[{"value":"0196-2892","type":"print"},{"value":"1558-0644","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]}}}