{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,13]],"date-time":"2026-06-13T16:00:55Z","timestamp":1781366455494,"version":"3.54.1"},"reference-count":261,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Open Project of Xiangjiang Laboratory","award":["22XJ03007"],"award-info":[{"award-number":["22XJ03007"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["42171376"],"award-info":[{"award-number":["42171376"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["41771458"],"award-info":[{"award-number":["41771458"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100019092","name":"Natural Science Foundation of Hunan for Distinguished Young Scholars","doi-asserted-by":"publisher","award":["2022JJ10072"],"award-info":[{"award-number":["2022JJ10072"]}],"id":[{"id":"10.13039\/501100019092","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004735","name":"Natural Science Foundation of Hunan","doi-asserted-by":"publisher","award":["2021JJ30815"],"award-info":[{"award-number":["2021JJ30815"]}],"id":[{"id":"10.13039\/501100004735","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100015397","name":"Inner Mongolia Science and Technology Plan","doi-asserted-by":"publisher","award":["2022YFSJ0014"],"award-info":[{"award-number":["2022YFSJ0014"]}],"id":[{"id":"10.13039\/501100015397","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002822","name":"High-Performance Computing Center of Central South University","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002822","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Geosci. Remote Sensing"],"published-print":{"date-parts":[[2023]]},"DOI":"10.1109\/tgrs.2023.3276853","type":"journal-article","created":{"date-parts":[[2023,5,16]],"date-time":"2023-05-16T19:45:48Z","timestamp":1684266348000},"page":"1-26","source":"Crossref","is-referenced-by-count":95,"title":["Self-Supervised Remote Sensing Feature Learning: Learning Paradigms, Challenges, and Future Works"],"prefix":"10.1109","volume":"61","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0071-310X","authenticated-orcid":false,"given":"Chao","family":"Tao","sequence":"first","affiliation":[{"name":"School of Geosciences and Info-Physics, Central South University, Changsha, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7948-579X","authenticated-orcid":false,"given":"Ji","family":"Qi","sequence":"additional","affiliation":[{"name":"School of Geosciences and Info-Physics, Central South University, Changsha, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mingning","family":"Guo","sequence":"additional","affiliation":[{"name":"School of Geosciences and Info-Physics, Central South University, Changsha, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0485-4965","authenticated-orcid":false,"given":"Qing","family":"Zhu","sequence":"additional","affiliation":[{"name":"Faculty of Geosciences and Environmental Engineering, Southwest Jiaotong University, Chengdu, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1173-6593","authenticated-orcid":false,"given":"Haifeng","family":"Li","sequence":"additional","affiliation":[{"name":"School of Geosciences and Info-Physics, Central South University, Changsha, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref57","article-title":"A large-scale study of representation learning with the visual task adaptation benchmark","author":"zhai","year":"2019","journal-title":"arXiv 1910 04867"},{"key":"ref207","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2022.3174651"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00872"},{"key":"ref208","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00803"},{"key":"ref59","article-title":"Self-supervised learning is more robust to dataset imbalance","author":"liu","year":"2021","journal-title":"Proc Adv Neural Inf Process Syst Workshop"},{"key":"ref205","article-title":"Learning crop type mapping from regional label proportions in large-scale SAR and optical imagery","author":"la rosa","year":"2022","journal-title":"arXiv 2208 11607"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1038\/s43017-019-0005-6"},{"key":"ref206","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2021.3063335"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-5446"},{"key":"ref203","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.07.015"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00128"},{"key":"ref204","doi-asserted-by":"publisher","DOI":"10.3390\/rs13214418"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00649"},{"key":"ref201","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01240"},{"key":"ref54","first-page":"1","article-title":"SuperGLUE: A stickier benchmark for general-purpose language understanding systems","volume":"32","author":"wang","year":"2019","journal-title":"Proc Adv Neural Inf Process Syst (NIPS)"},{"key":"ref202","article-title":"Mugs: A multi-granular self-supervised learning framework","author":"zhou","year":"2022","journal-title":"arXiv 2203 14415"},{"key":"ref209","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01002"},{"key":"ref210","doi-asserted-by":"publisher","DOI":"10.1016\/j.isprsjprs.2022.02.021"},{"key":"ref211","doi-asserted-by":"publisher","DOI":"10.1109\/IGARSS46834.2022.9883441"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3084358"},{"key":"ref50","article-title":"Domain generalization: A survey","author":"zhou","year":"2021","journal-title":"arXiv 2103 02503"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2013.07.007"},{"key":"ref218","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.73"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.5194\/essd-12-1217-2020"},{"key":"ref219","article-title":"Audio-visual contrastive learning for self-supervised action recognition","author":"lan","year":"2022","journal-title":"arXiv preprint arXiv 2204 13386"},{"key":"ref48","article-title":"Towards out-of-distribution generalization: A survey","author":"shen","year":"2021","journal-title":"arXiv 2108 13624"},{"key":"ref216","first-page":"10944","article-title":"What makes multi-modal learning better than single (provably)","volume":"34","author":"huang","year":"2021","journal-title":"Proc Adv Neural Inf Process Syst (NIPS)"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/MGRS.2016.2548504"},{"key":"ref217","first-page":"24206","article-title":"VATT: Transformers for multimodal self-supervised learning from raw video, audio and text","volume":"34","author":"akbari","year":"2021","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/MGRS.2022.3198244"},{"key":"ref214","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2021.108397"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2021.3134634"},{"key":"ref215","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2022.3173419"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1016\/j.scib.2019.03.002"},{"key":"ref212","doi-asserted-by":"publisher","DOI":"10.3390\/rs12111868"},{"key":"ref43","doi-asserted-by":"crossref","first-page":"2607","DOI":"10.1080\/01431161.2012.748992","article-title":"Finer resolution observation and monitoring of global land cover: First mapping results with Landsat TM and ETM+ data","volume":"34","author":"gong","year":"2013","journal-title":"Int J Remote Sens"},{"key":"ref213","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-68787-8_42"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3183326"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2017.2675998"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2017.2685945"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00418"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1090\/jams\/852"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.rse.2020.111716"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2022.3201688"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2020.3018591"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58526-6_13"},{"key":"ref221","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00990"},{"key":"ref101","first-page":"843","article-title":"Unsupervised learning of video representations using LSTMs","author":"srivastava","year":"2015","journal-title":"Proc Int Conf Mach Learn (ICML)"},{"key":"ref222","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58621-8_45"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.2992393"},{"key":"ref220","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.202"},{"key":"ref35","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"brown","year":"2020","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref34","article-title":"Language models are unsupervised multitask learners","author":"radford","year":"2019"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00737"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00202"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2017.2702596"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2021.3140154"},{"key":"ref33","first-page":"4171","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"devlin","year":"2019","journal-title":"Proc Conf North Amer Chapter Assoc Comput Linguistics (NAACL)"},{"key":"ref32","article-title":"Improving language understanding by generative pre-training","author":"radford","year":"2018"},{"key":"ref39","first-page":"857","article-title":"Self-supervised learning: Generative or contrastive","volume":"35","author":"liu","year":"2023","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"ref38","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume":"139","author":"radford","year":"2021","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2017.2651639"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2017.2734697"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2018.2856929"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2017.2748160"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1080\/01431161.2011.608740"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2015.2482520"},{"key":"ref21","first-page":"3371","article-title":"Stacked denoising autoencoders: Learning useful representations in a deep network with a local denoising criterion","volume":"11","author":"vincent","year":"2010","journal-title":"J Mach Learn Res"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3114203"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/IGARSS39084.2020.9323483"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.3390\/jimaging7020030"},{"key":"ref200","article-title":"Prototypical contrastive learning of unsupervised representations","author":"li","year":"2021","journal-title":"Proc Int Conf Learn Represent (ICLR)"},{"key":"ref128","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00480"},{"key":"ref249","first-page":"2579","article-title":"Visualizing data using t-SNE","volume":"9","author":"van der maaten","year":"2008","journal-title":"J Mach Learn Res"},{"key":"ref129","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00822"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.19"},{"key":"ref126","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2021.3057768"},{"key":"ref247","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW50498.2020.00020"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2022.3194732"},{"key":"ref127","article-title":"Equivariant self-supervised learning: Encouraging equivariance in representations","author":"dangovski","year":"2022","journal-title":"Proc Int Conf Learn Represent (ICLR)"},{"key":"ref248","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10590-1_53"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW53098.2021.00123"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2022.3184080"},{"key":"ref245","article-title":"Self-supervised spatiotemporal feature learning via video rotation prediction","author":"jing","year":"2018","journal-title":"arXiv 1811 11387"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-020-05687-9"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3027776"},{"key":"ref246","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-019-01228-7"},{"key":"ref93","first-page":"20026","article-title":"Adversarial masking for self-supervised learning","author":"shi","year":"2022","journal-title":"Proc 39th Int Conf Mach Learn"},{"key":"ref133","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46487-9_40"},{"key":"ref254","doi-asserted-by":"publisher","DOI":"10.5194\/isprsannals-I-3-293-2012"},{"key":"ref92","article-title":"ConvMAE: Masked convolution meets masked autoencoders","author":"gao","year":"2022","journal-title":"arXiv 2205 03892"},{"key":"ref134","first-page":"577","article-title":"Learning representations for automatic colorization","volume":"9908","author":"larsson","year":"2016","journal-title":"Proc Eur Conf Comput Vis (ECCV)"},{"key":"ref255","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2018.00031"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00943"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3104179"},{"key":"ref252","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3117983"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01426"},{"key":"ref132","doi-asserted-by":"publisher","DOI":"10.1109\/IGARSS47720.2021.9553408"},{"key":"ref253","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2017.2773199"},{"key":"ref250","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2019.2918242"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1109\/IGARSS.2019.8899169"},{"key":"ref251","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2016.2645610"},{"key":"ref91","article-title":"Context autoencoder for self-supervised representation learning","author":"chen","year":"2022","journal-title":"arXiv 2202 03026"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref89","article-title":"An image is worth 16&#x00D7;16 words: Transformers for image recognition at scale","author":"dosovitskiy","year":"2021","journal-title":"Proc Int Conf Learn Represent (ICLR)"},{"key":"ref139","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR48806.2021.9413112"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2022.3157917"},{"key":"ref137","doi-asserted-by":"publisher","DOI":"10.1109\/NEUREL.2018.8587001"},{"key":"ref258","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2006.881199"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2022.3190466"},{"key":"ref138","doi-asserted-by":"publisher","DOI":"10.23919\/ELMAR.2018.8534634"},{"key":"ref259","doi-asserted-by":"publisher","DOI":"10.1126\/science.1127647"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.76"},{"key":"ref256","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref87","first-page":"4","article-title":"Self-supervised feature learning for semantic segmentation of overhead imagery","volume":"1","author":"singh","year":"2018","journal-title":"Proc Brit Mach Vis Conf (BMVC)"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.96"},{"key":"ref257","doi-asserted-by":"publisher","DOI":"10.1109\/MGRS.2021.3089174"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.278"},{"key":"ref144","first-page":"6827","article-title":"What makes for good views for contrastive learning?","author":"tian","year":"2020","journal-title":"Proc 34th Int Conf Neural Inf Process Syst"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1016\/j.isprsjprs.2016.02.013"},{"key":"ref145","first-page":"9929","article-title":"Understanding contrastive representation learning through alignment and uniformity on the hypersphere","author":"wang","year":"2020","journal-title":"Proc 37th Int Conf Mach Learn"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1145\/3072959.3073659"},{"key":"ref142","doi-asserted-by":"publisher","DOI":"10.3390\/technologies9010002"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2021.3090418"},{"key":"ref143","article-title":"Towards the generalization of contrastive self-supervised learning","author":"huang","year":"2021","journal-title":"arXiv 2111 00743"},{"key":"ref140","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2022.3225663"},{"key":"ref261","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01906"},{"key":"ref141","first-page":"9960","article-title":"Self-supervised learning through the eyes of a child","volume":"33","author":"orhan","year":"2020","journal-title":"Proc Adv Neural Inf Process Syst (NIPS)"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2021.3064429"},{"key":"ref260","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2022.3176603"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2021.3137313"},{"key":"ref108","article-title":"SITS-former: A pre-trained spatio-spectral-temporal representation model for Sentinel-2 time series classification","volume":"106","author":"yuan","year":"2022","journal-title":"Int J Appl Earth Observ Geoinf"},{"key":"ref229","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2022.3204888"},{"key":"ref78","article-title":"Self-supervised hyperspectral image restoration using separable image prior","author":"imamura","year":"2019","journal-title":"arXiv 1907 00651"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.167"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2020.3036602"},{"key":"ref227","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2021.3109957"},{"key":"ref107","article-title":"Masked autoencoders as spatiotemporal learners","author":"feichtenhofer","year":"2022","journal-title":"arXiv 2205 09113"},{"key":"ref228","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2022.3195259"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390294"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-81462-5_61"},{"key":"ref225","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3248871"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2020.3038420"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2020.3047677"},{"key":"ref226","doi-asserted-by":"publisher","DOI":"10.1109\/IGARSS46834.2022.9883983"},{"key":"ref77","article-title":"Blind SAR image despeckling using self-supervised dense dilated convolutional neural network","author":"yuan","year":"2019","journal-title":"arXiv 1908 01608"},{"key":"ref102","first-page":"64","article-title":"Unsupervised learning for physical interaction through video prediction","author":"finn","year":"2016","journal-title":"Proc 30th Int Conf Neural Inf Process Syst"},{"key":"ref223","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.700"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2021.3065461"},{"key":"ref103","article-title":"Decomposing motion and content for natural video sequence prediction","author":"villegas","year":"2017","journal-title":"arXiv 1706 08033"},{"key":"ref224","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3136287"},{"key":"ref71","article-title":"Self-supervised audiovisual representation learning for remote sensing data","author":"heidler","year":"2021","journal-title":"arXiv 2108 00688"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2873701"},{"key":"ref232","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/LGRS.2021.3109345","article-title":"Contrastive multiview coding with electro-optics for SAR semantic segmentation","volume":"19","author":"cha","year":"2022","journal-title":"IEEE Geosci Remote Sens Lett"},{"key":"ref70","article-title":"SSL4EO-s12: A large-scale multi-modal, multi-temporal dataset for self-supervised learning in Earth observation","author":"wang","year":"2022","journal-title":"arXiv 2211 07044"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2018.00092"},{"key":"ref233","doi-asserted-by":"publisher","DOI":"10.1109\/IGARSS47720.2021.9553741"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00537"},{"key":"ref230","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2021.3089453"},{"key":"ref72","article-title":"TOV: The original vision model for optical remote sensing image understanding via self-supervised learning","author":"tao","year":"2022","journal-title":"arXiv 2204 04716"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46466-4_5"},{"key":"ref231","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2021.3128072"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1038\/514434c"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01243"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.5194\/isprs-annals-IV-2-W7-153-2019"},{"key":"ref117","article-title":"Unsupervised representation learning by predicting image rotations","author":"gidaris","year":"2018","journal-title":"Proc Int Conf Learn Represent (ICLR)"},{"key":"ref238","article-title":"VisualBERT: A simple and performant baseline for vision and language","author":"harold li","year":"2019","journal-title":"arXiv 1908 03557"},{"key":"ref69","first-page":"9394","article-title":"Seasonal contrast: Unsupervised pre-training from uncurated remote sensing data","author":"ma nas","year":"2021","journal-title":"Proc IEEE\/CVF Int Conf Comput Vis (ICCV)"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01061"},{"key":"ref239","first-page":"13","article-title":"ViLBERT: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks","volume":"32","author":"lu","year":"2019","journal-title":"Proc Adv Neural Inf Process Syst (NIPS)"},{"key":"ref64","article-title":"Aerial scene parsing: From tile-level scene classification to pixel-wise semantic labeling","author":"long","year":"2022","journal-title":"arXiv 2201 01953"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-59710-8_14"},{"key":"ref236","article-title":"Deep unsupervised contrastive hashing for large-scale cross-modal text-image retrieval in remote sensing","author":"mikriukov","year":"2022","journal-title":"arXiv 2201 08125"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00646"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01136"},{"key":"ref237","first-page":"4904","article-title":"Scaling up visual and vision-language representation learning with noisy text supervision","author":"jia","year":"2021","journal-title":"Proc 38th Int Conf Mach Learn"},{"key":"ref66","article-title":"SeasoNet: A seasonal scene classification, segmentation and retrieval dataset for satellite imagery over Germany","author":"ko\u00dfmann","year":"2022","journal-title":"arXiv 2207 09507"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00975"},{"key":"ref234","article-title":"Self-supervised remote sensing images change detection at pixel-level","author":"chen","year":"2021","journal-title":"arXiv 2105 08501"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/IGARSS.2019.8900532"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00201"},{"key":"ref235","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP46576.2022.9897500"},{"key":"ref60","article-title":"Self-supervised pretraining of visual features in the wild","author":"goyal","year":"2021","journal-title":"arXiv 2103 01988"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.3390\/rs12203276"},{"key":"ref243","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053174"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.3390\/rs13163122"},{"key":"ref244","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_48"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.3390\/rs9070725"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2973390"},{"key":"ref241","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2022.3163706"},{"key":"ref61","article-title":"Florence: A new foundation model for computer vision","author":"yuan","year":"2021","journal-title":"arXiv 2111 11432"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2021.3075244"},{"key":"ref242","doi-asserted-by":"publisher","DOI":"10.1109\/IGARSS46834.2022.9883242"},{"key":"ref240","article-title":"Contrastive learning of medical visual representations from paired images and text","author":"zhang","year":"2020","journal-title":"arXiv 2010 00747"},{"key":"ref168","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3211472"},{"key":"ref169","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2021.3131152"},{"key":"ref170","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2022.3177770"},{"key":"ref177","doi-asserted-by":"publisher","DOI":"10.3390\/rs13204158"},{"key":"ref178","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00087"},{"key":"ref175","doi-asserted-by":"publisher","DOI":"10.1109\/DICTA52665.2021.9647061"},{"key":"ref176","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW53098.2021.00129"},{"key":"ref173","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2021.3056883"},{"key":"ref174","doi-asserted-by":"crossref","first-page":"9224","DOI":"10.1109\/TGRS.2020.3048967","article-title":"A mutual information-based self-supervised learning model for PolSAR land cover classification","volume":"59","author":"ren","year":"2021","journal-title":"IEEE Trans Geosci Remote Sens"},{"key":"ref171","first-page":"28864","article-title":"Unsupervised object-level representation learning from scene images","volume":"34","author":"xie","year":"2021","journal-title":"Proc Adv Neural Inf Process Syst (NIPS)"},{"key":"ref172","article-title":"Self-supervised contrastive learning for irrigation detection in satellite imagery","author":"agastya","year":"2021","journal-title":"arXiv 2108 05484"},{"key":"ref179","doi-asserted-by":"publisher","DOI":"10.1007\/s11431-021-1989-9"},{"key":"ref180","doi-asserted-by":"publisher","DOI":"10.3390\/rs13163275"},{"key":"ref181","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2021.3069799"},{"key":"ref188","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.556"},{"key":"ref189","first-page":"478","article-title":"Unsupervised deep embedding for clustering analysis","author":"xie","year":"2016","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref186","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2022.3222836"},{"key":"ref187","article-title":"CliqueCNN: Deep unsupervised exemplar learning","volume":"29","author":"bautista","year":"2016","journal-title":"Proc Adv Neural Inf Process Syst (NIPS)"},{"key":"ref184","doi-asserted-by":"publisher","DOI":"10.1080\/01431161.2022.2042617"},{"key":"ref185","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00119"},{"key":"ref182","article-title":"Evaluating self and semi-supervised methods for remote sensing segmentation tasks","author":"patel","year":"2021","journal-title":"arXiv 2111 10079"},{"key":"ref183","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW56347.2022.00148"},{"key":"ref148","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298682"},{"key":"ref149","article-title":"Representation learning with contrastive predictive coding","author":"van den oord","year":"2018","journal-title":"arXiv 1807 03748"},{"key":"ref146","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2005.202"},{"key":"ref147","first-page":"207","article-title":"Distance metric learning for large margin nearest neighbor classification","volume":"10","author":"weinberger","year":"2009","journal-title":"J Mach Learn Res"},{"key":"ref155","first-page":"1597","article-title":"A simple framework for contrastive learning of visual representations","volume":"119","author":"chen","year":"2020","journal-title":"Proc 37th Int Conf Mach Learn (PMLR)"},{"key":"ref156","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"ref153","first-page":"22243","article-title":"Big self-supervised models are strong semi-supervised learners","volume":"33","author":"chen","year":"2020","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref154","first-page":"15509","article-title":"Learning representations by maximizing mutual information across views","volume":"32","author":"bachman","year":"2019","journal-title":"Proc Adv Neural Inf Process Syst (NIPS)"},{"key":"ref151","article-title":"Learning deep representations by mutual information estimation and maximization","author":"hjelm","year":"2019","journal-title":"Proc Int Conf Learn Representations (ICLR)"},{"key":"ref152","article-title":"Improved baselines with momentum contrastive learning","author":"chen","year":"2020","journal-title":"arXiv 2003 04297"},{"key":"ref150","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00393"},{"key":"ref159","first-page":"21271","article-title":"Bootstrap your own latent a new approach to self-supervised learning","author":"grill","year":"2020","journal-title":"Proc 34th Int Conf Neural Inf Process Syst"},{"key":"ref157","first-page":"21798","article-title":"Hard negative mixing for contrastive learning","author":"kalantidis","year":"2020","journal-title":"Proc 34th Int Conf Neural Inf Process Syst"},{"key":"ref158","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01549"},{"key":"ref166","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2022.3185088"},{"key":"ref167","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2022.3147513"},{"key":"ref164","doi-asserted-by":"publisher","DOI":"10.1049\/ell2.12108"},{"key":"ref165","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2020.3007029"},{"key":"ref162","first-page":"10268","article-title":"Understanding self-supervised learning dynamics without contrastive pairs","author":"tian","year":"2021","journal-title":"Proc 38th Int Conf Mach Learn"},{"key":"ref163","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013967"},{"key":"ref160","article-title":"Continual barlow twins: Continual self-supervised learning for remote sensing semantic segmentation","author":"marsocci","year":"2022","journal-title":"arXiv 2205 11319"},{"key":"ref161","first-page":"12310","article-title":"Barlow twins: Self-supervised learning via redundancy reduction","author":"zbontar","year":"2021","journal-title":"Proc 38th Int Conf Mach Learn"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.50"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.3390\/rs13224712"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2021.3070368"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.97"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2020.2992929"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/j.rse.2021.112830"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2010.2055033"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2013.2241444"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2015.2478379"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2017.2778749"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/MGRS.2017.2762307"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/MGRS.2016.2540798"},{"key":"ref191","doi-asserted-by":"publisher","DOI":"10.1109\/IGARSS46834.2022.9884735"},{"key":"ref192","doi-asserted-by":"crossref","first-page":"139","DOI":"10.1007\/978-3-030-01264-9_9","article-title":"Deep clustering for unsupervised learning of visual features","volume":"11218","author":"caron","year":"2018","journal-title":"Computer Vision&#x2014;ECCV 2018"},{"key":"ref190","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2022.3178168"},{"key":"ref199","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i10.17037"},{"key":"ref197","first-page":"9912","article-title":"Unsupervised learning of visual features by contrasting cluster assignments","volume":"33","author":"caron","year":"2020","journal-title":"Proc Adv Neural Inf Process Syst (NIPS)"},{"key":"ref198","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"ref195","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00610"},{"key":"ref196","doi-asserted-by":"publisher","DOI":"10.1109\/IGARSS39084.2020.9323294"},{"key":"ref193","first-page":"268","article-title":"SCAN: Learning to classify images without labels","volume":"12355","author":"gansbeke","year":"2020","journal-title":"Proc ECCV"},{"key":"ref194","article-title":"Self-labelling via simultaneous clustering and representation learning","author":"ym","year":"2020","journal-title":"Proc Int Conf Learn Represent (ICLR)"}],"container-title":["IEEE Transactions on Geoscience and Remote Sensing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/36\/10006360\/10126079.pdf?arnumber=10126079","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,19]],"date-time":"2023-06-19T18:04:18Z","timestamp":1687197858000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10126079\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"references-count":261,"URL":"https:\/\/doi.org\/10.1109\/tgrs.2023.3276853","relation":{},"ISSN":["0196-2892","1558-0644"],"issn-type":[{"value":"0196-2892","type":"print"},{"value":"1558-0644","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]}}}