{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T16:15:10Z","timestamp":1774714510181,"version":"3.50.1"},"reference-count":67,"publisher":"MDPI AG","issue":"16","license":[{"start":{"date-parts":[[2024,8,21]],"date-time":"2024-08-21T00:00:00Z","timestamp":1724198400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Key Research and Development Plan Project of Heilongjiang","award":["JD2023SJ19"],"award-info":[{"award-number":["JD2023SJ19"]}]},{"name":"Key Research and Development Plan Project of Heilongjiang","award":["LH2023F034"],"award-info":[{"award-number":["LH2023F034"]}]},{"name":"Key Research and Development Plan Project of Heilongjiang","award":["G2022012010L"],"award-info":[{"award-number":["G2022012010L"]}]},{"name":"Key Research and Development Plan Project of Heilongjiang","award":["HJK2024B002"],"award-info":[{"award-number":["HJK2024B002"]}]},{"name":"Key Research and Development Plan Project of Heilongjiang","award":["GZ20220123"],"award-info":[{"award-number":["GZ20220123"]}]},{"name":"Key Research and Development Plan Project of Heilongjiang","award":["24K15019"],"award-info":[{"award-number":["24K15019"]}]},{"name":"Natural Science Foundation of Heilongjiang Province","award":["JD2023SJ19"],"award-info":[{"award-number":["JD2023SJ19"]}]},{"name":"Natural Science Foundation of Heilongjiang Province","award":["LH2023F034"],"award-info":[{"award-number":["LH2023F034"]}]},{"name":"Natural Science Foundation of Heilongjiang Province","award":["G2022012010L"],"award-info":[{"award-number":["G2022012010L"]}]},{"name":"Natural Science Foundation of Heilongjiang Province","award":["HJK2024B002"],"award-info":[{"award-number":["HJK2024B002"]}]},{"name":"Natural Science Foundation of Heilongjiang Province","award":["GZ20220123"],"award-info":[{"award-number":["GZ20220123"]}]},{"name":"Natural Science Foundation of Heilongjiang Province","award":["24K15019"],"award-info":[{"award-number":["24K15019"]}]},{"name":"high-end foreign expert introduction program","award":["JD2023SJ19"],"award-info":[{"award-number":["JD2023SJ19"]}]},{"name":"high-end foreign expert introduction program","award":["LH2023F034"],"award-info":[{"award-number":["LH2023F034"]}]},{"name":"high-end foreign expert introduction program","award":["G2022012010L"],"award-info":[{"award-number":["G2022012010L"]}]},{"name":"high-end foreign expert introduction program","award":["HJK2024B002"],"award-info":[{"award-number":["HJK2024B002"]}]},{"name":"high-end foreign expert introduction program","award":["GZ20220123"],"award-info":[{"award-number":["GZ20220123"]}]},{"name":"high-end foreign expert introduction program","award":["24K15019"],"award-info":[{"award-number":["24K15019"]}]},{"name":"Science and Technology Project of Heilongjiang Provincial Department of Transportation","award":["JD2023SJ19"],"award-info":[{"award-number":["JD2023SJ19"]}]},{"name":"Science and Technology Project of Heilongjiang Provincial Department of Transportation","award":["LH2023F034"],"award-info":[{"award-number":["LH2023F034"]}]},{"name":"Science and Technology Project of Heilongjiang Provincial Department of Transportation","award":["G2022012010L"],"award-info":[{"award-number":["G2022012010L"]}]},{"name":"Science and Technology Project of Heilongjiang Provincial Department of Transportation","award":["HJK2024B002"],"award-info":[{"award-number":["HJK2024B002"]}]},{"name":"Science and Technology Project of Heilongjiang Provincial Department of Transportation","award":["GZ20220123"],"award-info":[{"award-number":["GZ20220123"]}]},{"name":"Science and Technology Project of Heilongjiang Provincial Department of Transportation","award":["24K15019"],"award-info":[{"award-number":["24K15019"]}]},{"name":"Key Research and Development Program Guidance Project of Heilongjiang","award":["JD2023SJ19"],"award-info":[{"award-number":["JD2023SJ19"]}]},{"name":"Key Research and Development Program Guidance Project of Heilongjiang","award":["LH2023F034"],"award-info":[{"award-number":["LH2023F034"]}]},{"name":"Key Research and Development Program Guidance Project of Heilongjiang","award":["G2022012010L"],"award-info":[{"award-number":["G2022012010L"]}]},{"name":"Key Research and Development Program Guidance Project of Heilongjiang","award":["HJK2024B002"],"award-info":[{"award-number":["HJK2024B002"]}]},{"name":"Key Research and Development Program Guidance Project of Heilongjiang","award":["GZ20220123"],"award-info":[{"award-number":["GZ20220123"]}]},{"name":"Key Research and Development Program Guidance Project of Heilongjiang","award":["24K15019"],"award-info":[{"award-number":["24K15019"]}]},{"name":"Japan Society for the Promotion of Science (JSPS) Grant-in-Aid Scientific Research(C)","award":["JD2023SJ19"],"award-info":[{"award-number":["JD2023SJ19"]}]},{"name":"Japan Society for the Promotion of Science (JSPS) Grant-in-Aid Scientific Research(C)","award":["LH2023F034"],"award-info":[{"award-number":["LH2023F034"]}]},{"name":"Japan Society for the Promotion of Science (JSPS) Grant-in-Aid Scientific Research(C)","award":["G2022012010L"],"award-info":[{"award-number":["G2022012010L"]}]},{"name":"Japan Society for the Promotion of Science (JSPS) Grant-in-Aid Scientific Research(C)","award":["HJK2024B002"],"award-info":[{"award-number":["HJK2024B002"]}]},{"name":"Japan Society for the Promotion of Science (JSPS) Grant-in-Aid Scientific Research(C)","award":["GZ20220123"],"award-info":[{"award-number":["GZ20220123"]}]},{"name":"Japan Society for the Promotion of Science (JSPS) Grant-in-Aid Scientific Research(C)","award":["24K15019"],"award-info":[{"award-number":["24K15019"]}]},{"name":"Chubu University Grant","award":["JD2023SJ19"],"award-info":[{"award-number":["JD2023SJ19"]}]},{"name":"Chubu University Grant","award":["LH2023F034"],"award-info":[{"award-number":["LH2023F034"]}]},{"name":"Chubu University Grant","award":["G2022012010L"],"award-info":[{"award-number":["G2022012010L"]}]},{"name":"Chubu University Grant","award":["HJK2024B002"],"award-info":[{"award-number":["HJK2024B002"]}]},{"name":"Chubu University Grant","award":["GZ20220123"],"award-info":[{"award-number":["GZ20220123"]}]},{"name":"Chubu University Grant","award":["24K15019"],"award-info":[{"award-number":["24K15019"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Remote Sensing"],"abstract":"<jats:p>Although the collaborative use of hyperspectral images (HSIs) and LiDAR data in land cover classification tasks has demonstrated significant importance and potential, several challenges remain. Notably, the heterogeneity in cross-modal information integration presents a major obstacle. Furthermore, most existing research relies heavily on category names, neglecting the rich contextual information from language descriptions. Visual-language pretraining (VLP) has achieved notable success in image recognition within natural domains by using multimodal information to enhance training efficiency and effectiveness. VLP has also shown great potential for land cover classification in remote sensing. This paper introduces a dual-sensor multimodal semantic collaborative classification network (DSMSC2N). It uses large language models (LLMs) in an instruction-driven manner to generate land cover category descriptions enriched with domain-specific knowledge in remote sensing. This approach aims to guide the model to accurately focus on and extract key features. Simultaneously, we integrate and optimize the complementary relationship between HSI and LiDAR data, enhancing the separability of land cover categories and improving classification accuracy. We conduct comprehensive experiments on benchmark datasets like Houston 2013, Trento, and MUUFL Gulfport, validating DSMSC2N\u2019s effectiveness compared to various baseline methods.<\/jats:p>","DOI":"10.3390\/rs16163082","type":"journal-article","created":{"date-parts":[[2024,8,22]],"date-time":"2024-08-22T04:26:57Z","timestamp":1724300817000},"page":"3082","update-policy":"https:\/\/doi.org\/10.3390\/mdpi_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Multimodal Semantic Collaborative Classification for Hyperspectral Images and LiDAR Data"],"prefix":"10.3390","volume":"16","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9118-230X","authenticated-orcid":false,"given":"Aili","family":"Wang","sequence":"first","affiliation":[{"name":"Heilongjiang Province Key Laboratory of Laser Spectroscopy Technology and Application, Harbin University of Science and Technology, Harbin 150080, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6368-9365","authenticated-orcid":false,"given":"Shiyu","family":"Dai","sequence":"additional","affiliation":[{"name":"Heilongjiang Province Key Laboratory of Laser Spectroscopy Technology and Application, Harbin University of Science and Technology, Harbin 150080, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2453-3691","authenticated-orcid":false,"given":"Haibin","family":"Wu","sequence":"additional","affiliation":[{"name":"Heilongjiang Province Key Laboratory of Laser Spectroscopy Technology and Application, Harbin University of Science and Technology, Harbin 150080, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1016-1636","authenticated-orcid":false,"given":"Yuji","family":"Iwahori","sequence":"additional","affiliation":[{"name":"Computer Science, Chubu University, Kasugai 487-8501, Japan"}]}],"member":"1968","published-online":{"date-parts":[[2024,8,21]]},"reference":[{"key":"ref_1","doi-asserted-by":"crossref","unstructured":"Sishodia, R.P., Ray, R.L., and Singh, S.K. (2020). Applications of remote sensing in precision agriculture: A review. Remote Sens., 12.","DOI":"10.3390\/rs12193136"},{"key":"ref_2","first-page":"1","article-title":"Special Section Guest Editorial: Satellite Hyperspectral Remote Sensing: Algorithms and Applications","volume":"42601","author":"Tan","year":"2021","journal-title":"J. Appl. Remote Sens."},{"key":"ref_3","doi-asserted-by":"crossref","first-page":"968","DOI":"10.1109\/JSTARS.2021.3133021","article-title":"Hyperspectral image classification\u2014Traditional to deep models: A survey for future prospects","volume":"15","author":"Ahmad","year":"2021","journal-title":"IEEE J. Sel. Top. Appl. Earth Obs. Remote Sens."},{"key":"ref_4","doi-asserted-by":"crossref","first-page":"6690","DOI":"10.1109\/TGRS.2019.2907932","article-title":"Deep learning for hyperspectral image classification: An overview","volume":"57","author":"Li","year":"2019","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"ref_5","doi-asserted-by":"crossref","first-page":"6124","DOI":"10.1109\/TGRS.2020.2974134","article-title":"Content-guided convolutional neural network for hyperspectral image classification","volume":"58","author":"Liu","year":"2020","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"ref_6","first-page":"8887","article-title":"Hybrid 3DNet: Hyperspectral Image Classification with Spectral-spatial Dimension Reduction using 3D CNN","volume":"975","author":"Zakaria","year":"2022","journal-title":"Int. J. Comput. Appl."},{"key":"ref_7","doi-asserted-by":"crossref","first-page":"5920","DOI":"10.1109\/TGRS.2020.3018449","article-title":"Fast sequential feature extraction for recurrent neural network-based hyperspectral image classification","volume":"59","author":"Ma","year":"2020","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"ref_8","doi-asserted-by":"crossref","unstructured":"Yang, X., Cao, W., Lu, Y., and Zhou, Y. (2022). Hyperspectral image transformer classification networks. IEEE Trans. Geosci. Remote Sens., 60.","DOI":"10.1109\/TGRS.2022.3171551"},{"key":"ref_9","doi-asserted-by":"crossref","first-page":"1579","DOI":"10.1109\/TGRS.2017.2765364","article-title":"Recent advances on spectral\u2013spatial hyperspectral image classification: An overview and new guidelines","volume":"56","author":"He","year":"2017","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"ref_10","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Lan, C., Zhang, H., Ma, G., and Li, H. (2024). Multimodal remote sensing image matching via learning features and attention mechanism. IEEE Trans. Geosci. Remote Sens., 62.","DOI":"10.1109\/TGRS.2023.3348980"},{"key":"ref_11","doi-asserted-by":"crossref","unstructured":"Ma, X., Zhang, X., Pun, M.-O., and Liu, M. (2024). A multilevel multimodal fusion transformer for remote sensing semantic segmentation. IEEE Trans. Geosci. Remote Sens., 62.","DOI":"10.1109\/TGRS.2024.3373033"},{"key":"ref_12","doi-asserted-by":"crossref","unstructured":"Wang, Q., Chen, W., Huang, Z., Tang, H., and Yang, L. (2024). MultiSenseSeg: A cost-effective unified multimodal semantic segmentation model for remote sensing. IEEE Trans. Geosci. Remote Sens., 62.","DOI":"10.1109\/TGRS.2024.3390750"},{"key":"ref_13","doi-asserted-by":"crossref","unstructured":"Li, J., Hong, D., Gao, L., Yao, J., Zheng, K., Zhang, B., and Chanussot, J. (2022). Deep learning in multimodal remote sensing data fusion: A comprehensive review. Int. J. Appl. Earth Obs. Geoinf., 112.","DOI":"10.1016\/j.jag.2022.102926"},{"key":"ref_14","doi-asserted-by":"crossref","first-page":"1560","DOI":"10.1109\/JPROC.2015.2449668","article-title":"Multimodal classification of remote sensing images: A review and future directions","volume":"103","author":"Tuia","year":"2015","journal-title":"Proc. IEEE"},{"key":"ref_15","doi-asserted-by":"crossref","first-page":"66","DOI":"10.1016\/j.inffus.2023.03.005","article-title":"A multimodal hyper-fusion transformer for remote sensing image classification","volume":"96","author":"Ma","year":"2023","journal-title":"Inf. Fusion"},{"key":"ref_16","doi-asserted-by":"crossref","first-page":"6","DOI":"10.1109\/MGRS.2018.2890023","article-title":"Multisource and multitemporal data fusion in remote sensing: A comprehensive review of the state of the art","volume":"7","author":"Ghamisi","year":"2019","journal-title":"IEEE Geosci. Remote Sens. Mag."},{"key":"ref_17","doi-asserted-by":"crossref","unstructured":"Dong, P., and Chen, Q. (2017). LiDAR Remote Sensing and Applications, CRC Press.","DOI":"10.4324\/9781351233354"},{"key":"ref_18","doi-asserted-by":"crossref","first-page":"1416","DOI":"10.1109\/TGRS.2008.916480","article-title":"Fusion of hyperspectral and LIDAR remote sensing data for classification of complex forest areas","volume":"46","author":"Dalponte","year":"2008","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"ref_19","doi-asserted-by":"crossref","first-page":"189","DOI":"10.1080\/19479832.2015.1055833","article-title":"Land-cover classification using both hyperspectral and LiDAR data","volume":"6","author":"Ghamisi","year":"2015","journal-title":"Int. J. Image Data Fusion"},{"key":"ref_20","doi-asserted-by":"crossref","first-page":"6804","DOI":"10.1109\/TCSVT.2023.3268757","article-title":"Joint contextual representation model-informed interpretable network with dictionary aligning for hyperspectral and LiDAR classification","volume":"33","author":"Dong","year":"2023","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"ref_21","doi-asserted-by":"crossref","unstructured":"Liu, Z., Mao, H., Wu, C.-Y., Feichtenhofer, C., Darrell, T., and Xie, S. (2022, January 18\u201324). A convnet for the 2020s. Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, New Orleans, LA, USA.","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"ref_22","doi-asserted-by":"crossref","unstructured":"Woo, S., Debnath, S., Hu, R., Chen, X., Liu, Z., Kweon, I.S., and Xie, S. (2023, January 17\u201324). Convnext v2: Co-designing and scaling convnets with masked autoencoders. Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Vancouver, BC, Canada.","DOI":"10.1109\/CVPR52729.2023.01548"},{"key":"ref_23","unstructured":"Yu, W., Zhou, P., Yan, S., and Wang, X. (2023). Inceptionnext: When inception meets convnext. arXiv."},{"key":"ref_24","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., and Gelly, S. (2020). An image is worth 16x16 words: Transformers for image recognition at scale. arXiv."},{"key":"ref_25","unstructured":"Ding, M., Xiao, B., Codella, N., Luo, P., Wang, J., and Yuan, L. Davit: Dual attention vision transformers. Proceedings of the European Conference on Computer Vision."},{"key":"ref_26","unstructured":"Patro, B.N., Namboodiri, V.P., and Agneeswaran, V.S. (2023). SpectFormer: Frequency and Attention is what you need in a Vision Transformer. arXiv."},{"key":"ref_27","doi-asserted-by":"crossref","unstructured":"Zhang, M., Li, W., Tao, R., Li, H., and Du, Q. (2021). Information fusion for classification of hyperspectral and LiDAR data using IP-CNN. IEEE Trans. Geosci. Remote Sens., 60.","DOI":"10.1109\/TGRS.2021.3093334"},{"key":"ref_28","doi-asserted-by":"crossref","unstructured":"Zhao, G., Ye, Q., Sun, L., Wu, Z., Pan, C., and Jeon, B. (2022). Joint classification of hyperspectral and LiDAR data using a hierarchical CNN and transformer. IEEE Trans. Geosci. Remote Sens., 61.","DOI":"10.1109\/TGRS.2022.3232498"},{"key":"ref_29","doi-asserted-by":"crossref","first-page":"3095","DOI":"10.1109\/TIP.2022.3162964","article-title":"Deep hierarchical vision transformer for hyperspectral and LiDAR data classification","volume":"31","author":"Xue","year":"2022","journal-title":"IEEE Trans. Image Process."},{"key":"ref_30","doi-asserted-by":"crossref","first-page":"937","DOI":"10.1109\/TGRS.2017.2756851","article-title":"Multisource remote sensing data classification based on convolutional neural network","volume":"56","author":"Xu","year":"2017","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"ref_31","first-page":"5500205","article-title":"Deep encoder\u2013decoder networks for classification of hyperspectral and LiDAR data","volume":"19","author":"Hong","year":"2020","journal-title":"IEEE Geosci. Remote Sens. Lett."},{"key":"ref_32","doi-asserted-by":"crossref","unstructured":"Li, J., Liu, Y., Song, R., Liu, W., Li, Y., and Du, Q. (2024). HyperMLP: Superpixel Prior and Feature Aggregated Perceptron Networks for Hyperspectral and Lidar Hybrid Classification. IEEE Trans. Geosci. Remote Sens., 62.","DOI":"10.1109\/TGRS.2024.3355037"},{"key":"ref_33","first-page":"5501505","article-title":"MS2CANet: Multi-scale Spatial-Spectral Cross-modal Attention Network for Hyperspectral image and LiDAR Classification","volume":"21","author":"Wang","year":"2024","journal-title":"IEEE Geosci. Remote Sens. Lett."},{"key":"ref_34","doi-asserted-by":"crossref","unstructured":"Song, T., Zeng, Z., Gao, C., Chen, H., and Li, J. (2024). Joint Classification of Hyperspectral and LiDAR Data Using Height Information Guided Hierarchical Fusion-and-Separation Network. IEEE Trans. Geosci. Remote Sens., 62.","DOI":"10.1109\/TGRS.2024.3353775"},{"key":"ref_35","doi-asserted-by":"crossref","first-page":"38","DOI":"10.1007\/s11633-022-1369-5","article-title":"Vlp: A survey on vision-language pre-training","volume":"20","author":"Chen","year":"2023","journal-title":"Mach. Intell. Res."},{"key":"ref_36","unstructured":"Radford, A., Kim, J.W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Askell, A., Mishkin, P., and Clark, J. (2021, January 18\u201324). Learning transferable visual models from natural language supervision. Proceedings of the International Conference on Machine Learning, Virtual."},{"key":"ref_37","unstructured":"Li, J., Li, D., Xiong, C., and Hoi, S. (2022, January 17\u201323). Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation. Proceedings of the International Conference on Machine Learning, Baltimore, MA, USA."},{"key":"ref_38","unstructured":"Li, J., Li, D., Savarese, S., and Hoi, S. (2023, January 23\u201329). Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. Proceedings of the International Conference on Machine Learning, Honolulu, HI, USA."},{"key":"ref_39","unstructured":"Dai, W., Li, J., Li, D., Tiong, A.M.H., Zhao, J., Wang, W., Li, B., Fung, P.N., and Hoi, S. (2024, January 10\u201316). Instructblip: Towards general-purpose vision-language models with instruction tuning. Proceedings of the 37th International Conference on Neural Information Processing Systems, New Orleans, LA, USA."},{"key":"ref_40","doi-asserted-by":"crossref","unstructured":"Liu, F., Chen, D., Guan, Z., Zhou, X., Zhu, J., Ye, Q., Fu, L., and Zhou, J. (2024). Remoteclip: A vision language foundation model for remote sensing. IEEE Trans. Geosci. Remote Sens., 62.","DOI":"10.1109\/TGRS.2024.3390838"},{"key":"ref_41","unstructured":"Jia, C., Yang, Y., Xia, Y., Chen, Y.-T., Parekh, Z., Pham, H., Le, Q., Sung, Y.-H., Li, Z., and Duerig, T. (2021, January 18\u201324). Scaling up visual and vision-language representation learning with noisy text supervision. Proceedings of the 38th International Conference on Machine Learning, ICML 2021, Online."},{"key":"ref_42","unstructured":"Devlin, J., Chang, M.-W., Lee, K., and Toutanova, K. (2018). Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv."},{"key":"ref_43","doi-asserted-by":"crossref","first-page":"681","DOI":"10.1007\/s11023-020-09548-1","article-title":"GPT-3: Its nature, scope, limits, and consequences","volume":"30","author":"Floridi","year":"2020","journal-title":"Minds Mach."},{"key":"ref_44","unstructured":"Zhao, W.X., Zhou, K., Li, J., Tang, T., Wang, X., Hou, Y., Min, Y., Zhang, B., Zhang, J., and Dong, Z. (2023). A survey of large language models. arXiv."},{"key":"ref_45","doi-asserted-by":"crossref","unstructured":"Roumeliotis, K.I., and Tselikas, N.D. (2023). Chatgpt and open-ai models: A preliminary review. Future Internet, 15.","DOI":"10.3390\/fi15060192"},{"key":"ref_46","unstructured":"Yue, X., Qu, X., Zhang, G., Fu, Y., Huang, W., Sun, H., Su, Y., and Chen, W. (2023). Mammoth: Building math generalist models through hybrid instruction tuning. arXiv."},{"key":"ref_47","unstructured":"Zhou, H., Gu, B., Zou, X., Li, Y., Chen, S.S., Zhou, P., Liu, J., Hua, Y., Mao, C., and Wu, X. (2023). A survey of large language models in medicine: Progress, application, and challenge. arXiv."},{"key":"ref_48","unstructured":"Bolton, E., Venigalla, A., Yasunaga, M., Hall, D., Xiong, B., Lee, T., Daneshjou, R., Frankle, J., Liang, P., and Carbin, M. (2024). Biomedlm: A 2.7 b parameter language model trained on biomedical text. arXiv."},{"key":"ref_49","unstructured":"Li, J., Liu, W., Ding, Z., Fan, W., Li, Y., and Li, Q. (2024). Large Language Models are in-Context Molecule Learners. arXiv."},{"key":"ref_50","unstructured":"Shi, B., Zhao, P., Wang, Z., Zhang, Y., Wang, Y., Li, J., Dai, W., Zou, J., Xiong, H., and Tian, Q. (2024). UMG-CLIP: A Unified Multi-Granularity Vision Generalist for Open-World Understanding. arXiv."},{"key":"ref_51","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., and Chen, M. (2022). Hierarchical text-conditional image generation with clip latents. arXiv."},{"key":"ref_52","doi-asserted-by":"crossref","first-page":"293","DOI":"10.1016\/j.neucom.2022.07.028","article-title":"Clip4clip: An empirical study of clip for end to end video clip retrieval and captioning","volume":"508","author":"Luo","year":"2022","journal-title":"Neurocomputing"},{"key":"ref_53","doi-asserted-by":"crossref","unstructured":"Rao, Y., Zhao, W., Chen, G., Tang, Y., Zhu, Z., Huang, G., Zhou, J., and Lu, J. (2022, January 18\u201324). Denseclip: Language-guided dense prediction with context-aware prompting. Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, New Orleans, LA, USA.","DOI":"10.1109\/CVPR52688.2022.01755"},{"key":"ref_54","first-page":"13988","article-title":"Clip-it! language-guided video summarization","volume":"34","author":"Narasimhan","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref_55","doi-asserted-by":"crossref","unstructured":"Zhang, R., Guo, Z., Zhang, W., Li, K., Miao, X., Cui, B., Qiao, Y., Gao, P., and Li, H. (2022, January 18\u201324). Pointclip: Point cloud understanding by clip. Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, New Orleans, LA, USA.","DOI":"10.1109\/CVPR52688.2022.00836"},{"key":"ref_56","doi-asserted-by":"crossref","unstructured":"Elizalde, B., Deshmukh, S., Al Ismail, M., and Wang, H. (2023, January 4\u201310). Clap learning audio concepts from natural language supervision. Proceedings of the ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Rhodes Island, Greece.","DOI":"10.1109\/ICASSP49357.2023.10095889"},{"key":"ref_57","unstructured":"Xue, L., Yu, N., Zhang, S., Panagopoulou, A., Li, J., Mart\u00edn-Mart\u00edn, R., Wu, J., Xiong, C., Xu, R., and Niebles, J.C. (2023). Ulip-2: Towards scalable multimodal pre-training for 3d understanding. arXiv."},{"key":"ref_58","unstructured":"Arora, S., Khandeparkar, H., Khodak, M., Plevrakis, O., and Saunshi, N. (2019). A theoretical analysis of contrastive unsupervised representation learning. arXiv."},{"key":"ref_59","unstructured":"Wang, T., and Isola, P. (2020, January 13\u201318). Understanding contrastive representation learning through alignment and uniformity on the hypersphere. Proceedings of the 37th International Conference on Machine Learning, ICML 2020, Online."},{"key":"ref_60","first-page":"5000","article-title":"Provable guarantees for self-supervised deep learning with spectral contrastive loss","volume":"34","author":"HaoChen","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref_61","unstructured":"Huang, W., Yi, M., Zhao, X., and Jiang, Z. (2021). Towards the generalization of contrastive self-supervised learning. arXiv."},{"key":"ref_62","doi-asserted-by":"crossref","first-page":"4340","DOI":"10.1109\/TGRS.2020.3016820","article-title":"More diverse means better: Multimodal deep learning meets remote-sensing imagery classification","volume":"59","author":"Hong","year":"2020","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"ref_63","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.inffus.2021.12.008","article-title":"Multi-attentive hierarchical dense fusion net for fusion classification of hyperspectral and LiDAR data","volume":"82","author":"Wang","year":"2022","journal-title":"Inf. Fusion"},{"key":"ref_64","doi-asserted-by":"crossref","unstructured":"Mohla, S., Pande, S., Banerjee, B., and Chaudhuri, S. (2020, January 13\u201319). Fusatnet: Dual attention based spectrospatial multimodal fusion network for hyperspectral and lidar classification. Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops, Seattle, WA, USA.","DOI":"10.21203\/rs.3.rs-32802\/v1"},{"key":"ref_65","doi-asserted-by":"crossref","first-page":"118","DOI":"10.1016\/j.inffus.2022.12.020","article-title":"Coupled adversarial learning for fusion classification of hyperspectral and LiDAR data","volume":"93","author":"Lu","year":"2023","journal-title":"Inf. Fusion"},{"key":"ref_66","doi-asserted-by":"crossref","unstructured":"Yang, Y., Zhu, D., Qu, T., Wang, Q., Ren, F., and Cheng, C. (2022). Single-stream CNN with learnable architecture for multisource remote sensing data. IEEE Trans. Geosci. Remote Sens., 60.","DOI":"10.1109\/TGRS.2022.3169163"},{"key":"ref_67","first-page":"16344","article-title":"Flashattention: Fast and memory-efficient exact attention with io-awareness","volume":"35","author":"Dao","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."}],"container-title":["Remote Sensing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.mdpi.com\/2072-4292\/16\/16\/3082\/pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T15:40:39Z","timestamp":1760110839000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.mdpi.com\/2072-4292\/16\/16\/3082"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,21]]},"references-count":67,"journal-issue":{"issue":"16","published-online":{"date-parts":[[2024,8]]}},"alternative-id":["rs16163082"],"URL":"https:\/\/doi.org\/10.3390\/rs16163082","relation":{},"ISSN":["2072-4292"],"issn-type":[{"value":"2072-4292","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,8,21]]}}}