{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T18:31:23Z","timestamp":1773772283162,"version":"3.50.1"},"reference-count":36,"publisher":"Springer Science and Business Media LLC","issue":"21","license":[{"start":{"date-parts":[[2023,7,31]],"date-time":"2023-07-31T00:00:00Z","timestamp":1690761600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,7,31]],"date-time":"2023-07-31T00:00:00Z","timestamp":1690761600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["41971365"],"award-info":[{"award-number":["41971365"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["41571401"],"award-info":[{"award-number":["41571401"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62102200"],"award-info":[{"award-number":["62102200"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100017700","name":"Henan Provincial Science and Technology Research Project","doi-asserted-by":"publisher","award":["2121022104922,232102211058,232102110299"],"award-info":[{"award-number":["2121022104922,232102211058,232102110299"]}],"id":[{"id":"10.13039\/501100017700","id-type":"DOI","asserted-by":"publisher"}]},{"name":"the Key Research Projects of Henan Higher Education Institutions","award":["23A520053,23B520030"],"award-info":[{"award-number":["23A520053,23B520030"]}]},{"name":"the Interdisciplinary Sciences Project of Nanyang Institute of Technology","award":["NGJC-2022-01"],"award-info":[{"award-number":["NGJC-2022-01"]}]},{"name":"the Doctoral Research Start-up Fund Project at Nanyang Institute of Technology"},{"name":"the General Project of Humanities and Social Sciences Research in Henan Province","award":["2022-ZZJh-081"],"award-info":[{"award-number":["2022-ZZJh-081"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2023,11]]},"DOI":"10.1007\/s10489-023-04725-y","type":"journal-article","created":{"date-parts":[[2023,7,31]],"date-time":"2023-07-31T13:02:19Z","timestamp":1690808539000},"page":"24947-24962","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["HELViT: highly efficient lightweight vision transformer for remote sensing image scene classification"],"prefix":"10.1007","volume":"53","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3927-7616","authenticated-orcid":false,"given":"Dongen","family":"Guo","sequence":"first","affiliation":[]},{"given":"Zechen","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Jiangfan","family":"Feng","sequence":"additional","affiliation":[]},{"given":"Zhuoke","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Zhen","family":"Shen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,7,31]]},"reference":[{"issue":"5","key":"4725_CR1","doi-asserted-by":"publisher","first-page":"494","DOI":"10.3390\/rs11050494","volume":"11","author":"Wei Zhang","year":"2019","unstructured":"Zhang Wei, Tang Ping, Zhao Lijun (2019) Remote sensing image scene classification using CNN-CapsNet. Remote Sensing 11(5):494","journal-title":"Remote Sensing"},{"issue":"5","key":"4725_CR2","doi-asserted-by":"publisher","first-page":"2811","DOI":"10.1109\/TGRS.2017.2783902","volume":"56","author":"Gong Cheng","year":"2018","unstructured":"Cheng Gong, Yang Ceyuan, Yao Xiwen, Guo Lei, Han Junwei (2018) When deep learning meets metric learning: Remote sensing image scene classification via learning discriminative CNNs. IEEE transactions on geoscience and remote sensing 56(5):2811\u20132821","journal-title":"IEEE transactions on geoscience and remote sensing"},{"issue":"3","key":"4725_CR3","doi-asserted-by":"publisher","first-page":"2520","DOI":"10.1109\/TGRS.2020.3001401","volume":"59","author":"J Wang","year":"2020","unstructured":"Wang J, Zhong Y, Zheng Z, Ma A, Zhang L (2020) Rsnet: The search for remote sensing deep neural networks in recognition tasks. IEEE Transactions on Geoscience and Remote Sensing 59(3):2520\u20132534","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"4725_CR4","doi-asserted-by":"publisher","first-page":"16740","DOI":"10.1109\/ACCESS.2022.3147543","volume":"10","author":"D Yu","year":"2022","unstructured":"Yu D, Xu Q, Guo H, Lu J, Lin Y, Liu X (2022) Aggregating features from dual paths for remote sensing image scene classification. IEEE Access 10:16740\u201316755","journal-title":"IEEE Access"},{"key":"4725_CR5","first-page":"1","volume":"60","author":"W Zhang","year":"2022","unstructured":"Zhang W, Jiao L, Liu F, Liu J, Cui Z (2022) Lhnet: Laplacian convolutional block for remote sensing image scene classification. IEEE Transactions on Geoscience and Remote Sensing 60:1\u201313","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"issue":"11","key":"4725_CR6","doi-asserted-by":"publisher","first-page":"1894","DOI":"10.1109\/LGRS.2019.2960026","volume":"17","author":"K Xu","year":"2020","unstructured":"Xu K, Huang H, Li Y, Shi G (2020) Multilayer feature fusion network for scene classification in remote sensing. IEEE Geoscience and Remote Sensing Letters 17(11):1894\u20131898","journal-title":"IEEE Geoscience and Remote Sensing Letters"},{"key":"4725_CR7","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, e.a. Sylvain: An image is worth 16x16 words: Transformers for image recognition at scale. In: International Conference on Learning Representations (2023)"},{"key":"4725_CR8","first-page":"1","volume":"60","author":"P Lv","year":"2022","unstructured":"Lv P, Wu W, Zhong Y, Du F, Zhang L (2022) Scvit: A spatial-channel feature preserving vision transformer for remote sensing image scene classification. IEEE Transactions on Geoscience and Remote Sensing 60:1\u201312","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"4725_CR9","first-page":"1","volume":"19","author":"Y Yu","year":"2022","unstructured":"Yu Y, Li Y, Wang J, Guan H, Li F, Xiao S, Tang E, Ding X (2022) C$$^2$$-capsvit: Cross-context and cross-scale capsule vision transformers for remote sensing image scene classification. IEEE Geoscience and Remote Sensing Letters 19:1\u20135","journal-title":"IEEE Geoscience and Remote Sensing Letters"},{"key":"4725_CR10","unstructured":"Li, J., Xia, X., Li, W., Li, H., Wang, X., Xiao, X., Wang, R., Zheng, M., Pan, X.: Next-vit: Next generation vision transformer for efficient deployment in realistic industrial scenarios. In: European Conference on Computer Vision (2022)"},{"key":"4725_CR11","unstructured":"Bolya, D., Fu, C.-Y., Dai, X., Zhang, P., Feichtenhofer, C., Hoffman, J.: Token merging: Your ViT but faster. In: International Conference on Learning Representations (2023)"},{"key":"4725_CR12","unstructured":"Wu, C., Wu, F., Qi, T., Huang, Y., Xie, X.: Fastformer: Additive attention can be all you need. In: European Conference on Computer Vision (2021)"},{"key":"4725_CR13","unstructured":"Li, K., Wang, Y., Gao, P., Song, G., Liu, Y., Li, H., Qiao, Y.: Uniformer: Unified transformer for efficient spatiotemporal representation learning. In: International Conference on Learning Representations (2022)"},{"key":"4725_CR14","unstructured":"Yang, C., Qiao, S., Yu, Q., Yuan, X., Zhu, Y., Yuille, A.L., Adam, H., Chen, L.-C.: Moat: Alternating mobile convolution and attention brings strong vision models. In: International Conference on Learning Representations (2023)"},{"key":"4725_CR15","doi-asserted-by":"crossref","unstructured":"Chen, Y., Dai, X., Chen, D., Liu, M., Dong, X., Yuan, L., Liu, Z.: Mobile-former: Bridging mobilenet and transformer. (2021)","DOI":"10.1109\/CVPR52688.2022.00520"},{"key":"4725_CR16","unstructured":"Li, Y., Yuan, G., Wen, Y., Hu, E., Evangelidis, G., Tulyakov, S., Wang, Y., Ren, J.: Efficientformer: Vision transformers at mobilenet speed. In: Conference on Neural Information Processing Systems (2022)"},{"key":"4725_CR17","doi-asserted-by":"crossref","unstructured":"Bolya, Daniel and Fu, Cheng-Yang and Dai, Xiaoliang and Zhang, Peizhao and Hoffman, Judy: Hydra attention: Efficient attention with many heads. In: Computer Vision\u2013ECCV 2022 Workshops: Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part VII, pp. 35\u201349 (2023). Springer","DOI":"10.1007\/978-3-031-25082-8_3"},{"key":"4725_CR18","doi-asserted-by":"crossref","unstructured":"Wang, W., Xie, E., Li, X., Fan, D.-P., Song, K., Liang, D., Lu, T., Luo, P., Shao, L.: Pvtv2: Improved baselines with pyramid vision transformer. Computational Visual Media (2021)","DOI":"10.1007\/s41095-022-0274-8"},{"key":"4725_CR19","unstructured":"Liu, J., Pan, Z., He, H., Cai, J., Zhuang, B.: Ecoformer: Energy-saving attention with linear complexity. In: NeurIPS (2022)"},{"key":"4725_CR20","unstructured":"Kitaev, N., Kaiser, L., Levskaya, A.: Reformer: The efficient transformer. In: International Conference on Learning Representations (2020)"},{"key":"4725_CR21","doi-asserted-by":"crossref","unstructured":"Kong, Z., Dong, P., Ma, X., Meng, X., Niu, W., Sun, M., Ren, B., Qin, M., Tang, H., Wang, Y.: Spvit: Enabling faster vision transformers via soft token pruning. In: European Conference on Computer Vision (2022)","DOI":"10.1007\/978-3-031-20083-0_37"},{"key":"4725_CR22","unstructured":"Liang, Y., Ge, C., Tong, Z., Song, Y., Wang, J., Xie, P.: Not all patches are what you need: Expediting vision transformers via token reorganizations. In: International Conference on Learning Representations (2022)"},{"key":"4725_CR23","doi-asserted-by":"crossref","unstructured":"Marin, D., Chang, J.-H.R., Ranjan, A., Prabhu, A., Rastegari, M., Tuzel, O.: Token pooling in vision transformers for image classification. In: 2023 IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV), pp. 12\u201321 (2023)","DOI":"10.1109\/WACV56688.2023.00010"},{"key":"4725_CR24","doi-asserted-by":"crossref","unstructured":"Yang, Y., Newsam, S.: Bag-of-visual-words and spatial extensions for land-use classification. advances in geographic information systems, 270\u2013279 (2010)","DOI":"10.1145\/1869790.1869829"},{"issue":"7","key":"4725_CR25","doi-asserted-by":"publisher","first-page":"3965","DOI":"10.1109\/TGRS.2017.2685945","volume":"55","author":"G-S Xia","year":"2017","unstructured":"Xia G-S, Hu J, Hu F, Shi B, Bai X, Zhong Y, Zhang L, Lu X (2017) Aid: A benchmark data set for performance evaluation of aerial scene classification. IEEE Transactions on Geoscience and Remote Sensing 55(7):3965\u20133981","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"issue":"10","key":"4725_CR26","doi-asserted-by":"publisher","first-page":"1865","DOI":"10.1109\/JPROC.2017.2675998","volume":"105","author":"G Cheng","year":"2017","unstructured":"Cheng G, Han J, Lu X (2017) Remote sensing image scene classification: Benchmark and state of the art. Proceedings of the IEEE 105(10):1865\u20131883","journal-title":"Proceedings of the IEEE"},{"key":"4725_CR27","doi-asserted-by":"crossref","unstructured":"Bi Q, Qin K, Li Z, Zhang H, Xu K, Xia G-S (2020) A multiple-instance densely-connected convnet for aerial scene classification. IEEE Transactions on Image Processing 29:4911\u20134926","DOI":"10.1109\/TIP.2020.2975718"},{"issue":"9","key":"4725_CR28","doi-asserted-by":"publisher","first-page":"1603","DOI":"10.1109\/LGRS.2019.2949930","volume":"17","author":"Q Bi","year":"2020","unstructured":"Bi Q, Qin K, Zhang H, Xie J, Li Z, Xu K (2020) Apdc-net: Attention pooling-based convolutional network for aerial scene classification. IEEE Geoscience and Remote Sensing Letters 17(9):1603\u20131607","journal-title":"IEEE Geoscience and Remote Sensing Letters"},{"key":"4725_CR29","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1016\/j.neucom.2019.11.068","volume":"377","author":"Q Bi","year":"2020","unstructured":"Bi Q, Qin K, Zhang H, Li Z, Xu K (2020) Radc-net: A residual attention based convolution network for aerial scene classification. Neurocomputing 377:345\u2013359","journal-title":"Neurocomputing"},{"key":"4725_CR30","doi-asserted-by":"publisher","first-page":"147","DOI":"10.1016\/j.neucom.2021.01.038","volume":"436","author":"Q Bi","year":"2021","unstructured":"Bi Q, Zhang H, Qin K (2021) Multi-scale stacking attention pooling for remote sensing scene classification. Neurocomputing 436:147\u2013161","journal-title":"Neurocomputing"},{"issue":"9","key":"4725_CR31","doi-asserted-by":"publisher","first-page":"7918","DOI":"10.1109\/TGRS.2020.3044655","volume":"59","author":"X Wang","year":"2021","unstructured":"Wang X, Wang S, Ning C, Zhou H (2021) Enhanced feature pyramid network with deep semantic embedding for remote sensing scene classification. IEEE Transactions on Geoscience and Remote Sensing 59(9):7918\u20137932","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"4725_CR32","doi-asserted-by":"publisher","first-page":"6498","DOI":"10.1109\/TIP.2021.3092816","volume":"30","author":"Q Bi","year":"2021","unstructured":"Bi Q, Qin K, Zhang H, Xia G-S (2021) Local semantic enhanced convnet for aerial scene recognition. IEEE Transactions on Image Processing 30:6498\u20136511","journal-title":"IEEE Transactions on Image Processing"},{"issue":"3","key":"4725_CR33","doi-asserted-by":"publisher","first-page":"516","DOI":"10.3390\/rs13030516","volume":"13","author":"Y Bazi","year":"2021","unstructured":"Bazi Y, Bashmal L, Rahhal MMA, Dayil RA, Ajlan NA (2021) Vision transformers for remote sensing image classification. Remote Sensing 13(3):516","journal-title":"Remote Sensing"},{"key":"4725_CR34","doi-asserted-by":"crossref","unstructured":"Deng, P., Xu, K., Huang, .H.: When cnns meet vision transformer: A joint framework for remote sensing scene classification. IEEE Geoscience and Remote Sensing Letters 19, 1\u20135 (2021)","DOI":"10.1109\/LGRS.2021.3109061"},{"issue":"20","key":"4725_CR35","doi-asserted-by":"publisher","first-page":"4143","DOI":"10.3390\/rs13204143","volume":"13","author":"J Zhang","year":"2021","unstructured":"Zhang J, Zhao H, Li J (2021) TRS: Transformers for remote sensing scene classification. Remote Sensing 13(20):4143","journal-title":"Remote Sensing"},{"key":"4725_CR36","doi-asserted-by":"crossref","unstructured":"Zhang Y, Zheng X, Lu X (2021) Pairwise comparison network for remote-sensing scene classification. IEEE Geoscience and Remote Sensing Letters 19:1\u20135","DOI":"10.1109\/LGRS.2021.3139695"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-023-04725-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-023-04725-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-023-04725-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,23]],"date-time":"2023-10-23T14:12:52Z","timestamp":1698070372000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-023-04725-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,7,31]]},"references-count":36,"journal-issue":{"issue":"21","published-print":{"date-parts":[[2023,11]]}},"alternative-id":["4725"],"URL":"https:\/\/doi.org\/10.1007\/s10489-023-04725-y","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,7,31]]},"assertion":[{"value":"22 May 2023","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 July 2023","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"The authors have no relevant financial or non-financial interests to disclose. The authors have no competing interests to declare that are relevant to the content of this article.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}