{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T19:17:03Z","timestamp":1768072623446,"version":"3.49.0"},"reference-count":16,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,7,17]],"date-time":"2022-07-17T00:00:00Z","timestamp":1658016000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,7,17]],"date-time":"2022-07-17T00:00:00Z","timestamp":1658016000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62171332"],"award-info":[{"award-number":["62171332"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,7,17]]},"DOI":"10.1109\/igarss46834.2022.9883252","type":"proceedings-article","created":{"date-parts":[[2022,9,28]],"date-time":"2022-09-28T20:12:24Z","timestamp":1664395944000},"page":"839-842","source":"Crossref","is-referenced-by-count":21,"title":["Multi-Scale Interactive Transformer for Remote Sensing Cross-Modal Image-Text Retrieval"],"prefix":"10.1109","author":[{"given":"Yijing","family":"Wang","sequence":"first","affiliation":[{"name":"School of Artificial Intelligence, Xidian University,Xi&#x0027;an,Shaanxi Province,China,710071"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jingjing","family":"Ma","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, Xidian University,Xi&#x0027;an,Shaanxi Province,China,710071"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mingteng","family":"Li","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, Xidian University,Xi&#x0027;an,Shaanxi Province,China,710071"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xu","family":"Tang","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, Xidian University,Xi&#x0027;an,Shaanxi Province,China,710071"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiao","family":"Han","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, Xidian University,Xi&#x0027;an,Shaanxi Province,China,710071"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Licheng","family":"Jiao","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, Xidian University,Xi&#x0027;an,Shaanxi Province,China,710071"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"devlin","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref11","first-page":"5998","article-title":"Attention is all you need","author":"ashish","year":"2017","journal-title":"Ad-vances in Neural Information Processing Systems"},{"key":"ref12","first-page":"1251","article-title":"Xception: Deep learning with depth-wise separable convolutions","author":"chollet","year":"2017","journal-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"ref13","article-title":"Multi -scale context aggregation by dilated convolutions","author":"yu","year":"2015","journal-title":"ar Xiv preprint"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2017.2776321"},{"key":"ref15","article-title":"Vilt: Vision-and-language transformer without con-volution or region supervision","author":"kim","year":"2021","journal-title":"ar Xiv preprint"},{"key":"ref16","article-title":"Vse++: Improving visual-semantic embeddings with hard negatives","author":"fartash","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref4","first-page":"1","article-title":"Meta-hashing for remote sensing image retrieval","author":"yang","year":"2021","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2020.3007533"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2021.3070872"},{"key":"ref5","first-page":"1","article-title":"A lightweight multi-scale crossmodal text-image retrieval method in remote sensing","author":"yuan","year":"2021","journal-title":"IEEE transactions on geo-science and remote sensing"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.308"},{"key":"ref7","first-page":"1","article-title":"Exploring a fine-grained multiscale method for cross-modal re-mote sensing image retrieval","author":"yuan","year":"2021","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.3390\/rs10081243"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/78.650093"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2015.2429137"}],"event":{"name":"IGARSS 2022 - 2022 IEEE International Geoscience and Remote Sensing Symposium","location":"Kuala Lumpur, Malaysia","start":{"date-parts":[[2022,7,17]]},"end":{"date-parts":[[2022,7,22]]}},"container-title":["IGARSS 2022 - 2022 IEEE International Geoscience and Remote Sensing Symposium"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9883023\/9883024\/09883252.pdf?arnumber=9883252","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,14]],"date-time":"2022-10-14T20:53:18Z","timestamp":1665780798000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9883252\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,7,17]]},"references-count":16,"URL":"https:\/\/doi.org\/10.1109\/igarss46834.2022.9883252","relation":{},"subject":[],"published":{"date-parts":[[2022,7,17]]}}}