{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T06:32:34Z","timestamp":1778135554744,"version":"3.51.4"},"reference-count":89,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T00:00:00Z","timestamp":1733097600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T00:00:00Z","timestamp":1733097600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100014718","name":"Innovative Research Group Project of the National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62302278"],"award-info":[{"award-number":["62302278"]}],"id":[{"id":"10.13039\/100014718","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100014718","name":"Innovative Research Group Project of the National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["52374221"],"award-info":[{"award-number":["52374221"]}],"id":[{"id":"10.13039\/100014718","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100014103","name":"Key Technology Research and Development Program of Shandong Province","doi-asserted-by":"publisher","award":["2022ZD0119501"],"award-info":[{"award-number":["2022ZD0119501"]}],"id":[{"id":"10.13039\/100014103","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100010029","name":"Taishan Scholar Foundation of Shandong Province","doi-asserted-by":"publisher","award":["ts20190936"],"award-info":[{"award-number":["ts20190936"]}],"id":[{"id":"10.13039\/501100010029","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100007129","name":"Natural Science Foundation of Shandong Province","doi-asserted-by":"publisher","award":["ZR2023QF014"],"award-info":[{"award-number":["ZR2023QF014"]}],"id":[{"id":"10.13039\/501100007129","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100014761","name":"Natural Science Foundation of Qingdao Municipality","doi-asserted-by":"publisher","award":["23-2-1-112-zyyd-jch"],"award-info":[{"award-number":["23-2-1-112-zyyd-jch"]}],"id":[{"id":"10.13039\/501100014761","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2025,1]]},"DOI":"10.1007\/s10489-024-05910-3","type":"journal-article","created":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T10:52:14Z","timestamp":1733136734000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["CRViT: Vision transformer advanced by causality and inductive bias for image recognition"],"prefix":"10.1007","volume":"55","author":[{"given":"Faming","family":"Lu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kunhao","family":"Jia","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xue","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lin","family":"Sun","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,12,2]]},"reference":[{"key":"5910_CR1","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser L, Polosukhin I (2017) Attention is all you need. In: Proceedings of the 31st international conference on neural information processing systems. NIPS\u201917. Curran Associates Inc., Red Hook, NY, USA, pp 6000\u20136010"},{"key":"5910_CR2","unstructured":"Devlin J, Chang M, Lee K, Toutanova K (2019) BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2019, Minneapolis, MN, USA, June 2-7, 2019, Volume 1 (Long and Short Papers), pp 4171\u20134186"},{"key":"5910_CR3","doi-asserted-by":"crossref","unstructured":"Wang A, Singh A, Michael J, Hill F, Levy O, Bowman SR (2018) GLUE: a multi-task benchmark and analysis platform for natural language understanding, 1\u201320. arXiv preprint arXiv:1804.07461","DOI":"10.18653\/v1\/W18-5446"},{"key":"5910_CR4","doi-asserted-by":"crossref","unstructured":"Ojo OE, Ta HT, Gelbukh A, Calvo H, Adebanji OO, Sidorov G (2023) Transformer-based approaches to sentiment detection. In: Recent developments and the new directions of research, foundations, and applications: selected papers of the 8th world conference on soft computing, February 03\u201305, 2022, Baku, Azerbaijan, Vol. II, pp 101\u2013110","DOI":"10.1007\/978-3-031-23476-7_10"},{"key":"5910_CR5","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Houlsby N (2020) An image is worth 16x16 words: transformers for image recognition at scale, 1\u201322. arXiv preprint arXiv:2010.11929"},{"key":"5910_CR6","doi-asserted-by":"crossref","unstructured":"Sun C, Shrivastava A, Singh S, Gupta A (2017) Revisiting unreasonable effectiveness of data in deep learning era. In: Proceedings of the IEEE international conference on computer vision, pp 843\u2013852","DOI":"10.1109\/ICCV.2017.97"},{"key":"5910_CR7","doi-asserted-by":"crossref","unstructured":"Szegedy C, Liu W, Jia Y, Sermanet P, Reed S, Anguelov D, Erhan D, Vanhoucke V, Rabinovich A (2015) Going deeper with convolutions. In: 2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp 1\u20139","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"5910_CR8","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) Imagenet classification with deep convolutional neural networks. In: Advances in neural information processing systems, vol 25, pp 1\u20139"},{"key":"5910_CR9","unstructured":"Simonyan K (2014) Very deep convolutional networks for large-scale image recognition, 1\u201314. arXiv preprint arXiv:1409.1556"},{"key":"5910_CR10","unstructured":"Tan M (2019) Efficientnet: rethinking model scaling for convolutional neural networks, 1\u201311. arXiv preprint arXiv:1905.11946"},{"key":"5910_CR11","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"5910_CR12","doi-asserted-by":"crossref","unstructured":"Long Y, Wen Y, Han J, Xu H, Ren P, Zhang W, Zhao S, Liang X (2023) Capdet: unifying dense captioning and open-world detection pretraining. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 15233\u201315243","DOI":"10.1109\/CVPR52729.2023.01462"},{"key":"5910_CR13","doi-asserted-by":"crossref","unstructured":"Lei J, Hu X, Wang Y, Liu D (2023) Pyramidflow: high-resolution defect contrastive localization using pyramid normalizing flow. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 14143\u201314152","DOI":"10.1109\/CVPR52729.2023.01359"},{"key":"5910_CR14","doi-asserted-by":"crossref","unstructured":"Chen F, Zhang H, Hu K, Huang Y-K, Zhu C, Savvides M (2023) Enhanced training of query-based object detection via selective query recollection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 23756\u201323765","DOI":"10.1109\/CVPR52729.2023.02275"},{"key":"5910_CR15","doi-asserted-by":"crossref","unstructured":"Yun S, Han D, Oh SJ, Chun S, Choe J, Yoo Y (2019) Cutmix: regularization strategy to train strong classifiers with localizable features. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 6023\u20136032","DOI":"10.1109\/ICCV.2019.00612"},{"key":"5910_CR16","unstructured":"Touvron H, Cord M, Douze M, Massa F, Sablayrolles A, J\u00e9gou H (2021) Training data-efficient image transformers & distillation through attention. In: International conference on machine learning, pp 10347\u201310357"},{"key":"5910_CR17","doi-asserted-by":"crossref","unstructured":"Heo B, Yun S, Han D, Chun S, Choe J, Oh SJ (2021) Rethinking spatial dimensions of vision transformers. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 11936\u201311945","DOI":"10.1109\/ICCV48922.2021.01172"},{"key":"5910_CR18","unstructured":"Russakovsky O, Deng J, Su H, Krause J, Satheesh S, Ma S, Huang Z, Karpathy A, Khosla A, Bernstein M (2014) Imagenet large scale visual recognition challenge. Int J Comput Vis, 1\u201342"},{"key":"5910_CR19","doi-asserted-by":"crossref","unstructured":"Pan X, Ye T, Xia Z, Song S, Huang G (2023) Slide-transformer: hierarchical vision transformer with local self-attention. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 2082\u20132091","DOI":"10.1109\/CVPR52729.2023.00207"},{"issue":"2","key":"5910_CR20","doi-asserted-by":"publisher","first-page":"263","DOI":"10.1007\/s11633-022-1374-8","volume":"20","author":"X-Y Lin","year":"2023","unstructured":"Lin X-Y, Xu Y-Y, Wang W-J, Zhang Y, Feng F-L (2023) Mitigating spurious correlations for self-supervised recommendation. Mach Intell Res 20(2):263\u2013275","journal-title":"Mach Intell Res"},{"key":"5910_CR21","first-page":"1","volume":"20","author":"A Rahimi","year":"2007","unstructured":"Rahimi A, Recht B (2007) Random features for large-scale kernel machines. Adv Neural Inf Process Syst 20:1\u20138","journal-title":"Adv Neural Inf Process Syst"},{"key":"5910_CR22","first-page":"1","volume-title":"Causal inference in statistics: a primer","author":"J Pearl","year":"2016","unstructured":"Pearl J, Glymour M, Jewell NP (2016) Causal inference in statistics: a primer. John Wiley & Sons, Hoboken, pp 1\u201390"},{"key":"5910_CR23","doi-asserted-by":"crossref","unstructured":"Wang L, Boddeti VN (2022) Do learned representations respect causal relationships? In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 264\u2013274","DOI":"10.1109\/CVPR52688.2022.00036"},{"key":"5910_CR24","doi-asserted-by":"crossref","unstructured":"Hu X, Tang K, Miao C, Hua X-S, Zhang H (2021) Distilling causal effect of data in class-incremental learning. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 3957\u20133966","DOI":"10.1109\/CVPR46437.2021.00395"},{"issue":"5","key":"5910_CR25","doi-asserted-by":"publisher","first-page":"612","DOI":"10.1109\/JPROC.2021.3058954","volume":"109","author":"B Sch\u00f6lkopf","year":"2021","unstructured":"Sch\u00f6lkopf B, Locatello F, Bauer S, Ke NR, Kalchbrenner N, Goyal A, Bengio Y (2021) Toward causal representation learning. Proc IEEE 109(5):612\u2013634","journal-title":"Proc IEEE"},{"issue":"6","key":"5910_CR26","doi-asserted-by":"publisher","first-page":"485","DOI":"10.1007\/s11633-022-1362-z","volume":"19","author":"Y Liu","year":"2022","unstructured":"Liu Y, Wei Y-S, Yan H, Li G-B, Lin L (2022) Causal reasoning meets visual representation learning: a prospective study. Mach Intell Res 19(6):485\u2013511","journal-title":"Mach Intell Res"},{"issue":"4","key":"5910_CR27","doi-asserted-by":"publisher","first-page":"1095","DOI":"10.1109\/TMI.2022.3224067","volume":"42","author":"C Ouyang","year":"2022","unstructured":"Ouyang C, Chen C, Li S, Li Z, Qin C, Bai W, Rueckert D (2022) Causality-inspired single-source domain generalization for medical image segmentation. IEEE Trans Med Imaging 42(4):1095\u20131106","journal-title":"IEEE Trans Med Imaging"},{"key":"5910_CR28","doi-asserted-by":"crossref","unstructured":"Chen Z, Tian Z, Zhu J, Li C, Du S (2022) C-cam: causal cam for weakly supervised semantic segmentation on medical image. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 11676\u201311685","DOI":"10.1109\/CVPR52688.2022.01138"},{"key":"5910_CR29","doi-asserted-by":"crossref","unstructured":"Miao J, Chen C, Liu F, Wei H, Heng P-A (2023) Caussl: causality-inspired semi-supervised learning for medical image segmentation. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 21426\u201321437","DOI":"10.1109\/ICCV51070.2023.01959"},{"issue":"4","key":"5910_CR30","doi-asserted-by":"publisher","first-page":"1095","DOI":"10.1109\/TMI.2022.3224067","volume":"42","author":"C Ouyang","year":"2021","unstructured":"Ouyang C, Chen C, Li S, Li Z, Qin C, Bai W, Rueckert D (2021) Causality-inspired single-source domain generalization for medical image segmentation. IEEE Trans Med Imaging 42(4):1095\u20131106","journal-title":"IEEE Trans Med Imaging"},{"key":"5910_CR31","doi-asserted-by":"crossref","unstructured":"Zhang Y, Huang Z-A, Hong Z, Wu S, Wu J, Tan K (2024) Mixed prototype correction for causal inference in medical image classification. In: ACM Multimedia 2024, pp 1\u201310","DOI":"10.1145\/3664647.3681395"},{"key":"5910_CR32","doi-asserted-by":"crossref","unstructured":"Yang Z, Lin M, Zhong X, Wu Y, Wang Z (2023) Good is bad: causality inspired cloth-debiasing for cloth-changing person re-identification. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 1472\u20131481","DOI":"10.1109\/CVPR52729.2023.00148"},{"key":"5910_CR33","doi-asserted-by":"crossref","unstructured":"Rao Y, Chen G, Lu J, Zhou J (2021) Counterfactual attention learning for fine-grained visual categorization and re-identification. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 1025\u20131034","DOI":"10.1109\/ICCV48922.2021.00106"},{"key":"5910_CR34","doi-asserted-by":"crossref","unstructured":"Niu Y, Tang K, Zhang H, Lu Z, Hua X-S, Wen J-R (2021) Counterfactual vqa: a cause-effect look at language bias. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 12700\u201312710","DOI":"10.1109\/CVPR46437.2021.01251"},{"key":"5910_CR35","doi-asserted-by":"crossref","unstructured":"Yang X, Zhang H, Qi G, Cai J (2021) Causal attention for vision-language tasks. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 9847\u20139857","DOI":"10.1109\/CVPR46437.2021.00972"},{"issue":"5","key":"5910_CR36","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3409382","volume":"53","author":"K Yu","year":"2020","unstructured":"Yu K, Guo X, Liu L, Li J, Wang H, Ling Z, Wu X (2020) Causality-based feature selection: methods and evaluations. ACM Comput Surv (CSUR) 53(5):1\u201336","journal-title":"ACM Comput Surv (CSUR)"},{"issue":"4","key":"5910_CR37","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3436891","volume":"15","author":"K Yu","year":"2021","unstructured":"Yu K, Liu L, Li J (2021) A unified view of causal and non-causal feature selection. ACM Trans Knowl Discov Data (TKDD) 15(4):1\u201346","journal-title":"ACM Trans Knowl Discov Data (TKDD)"},{"issue":"4","key":"5910_CR38","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3488055","volume":"16","author":"K Yu","year":"2022","unstructured":"Yu K, Yang Y, Ding W (2022) Causal feature selection with missing data. ACM Trans Knowl Discov Data (TKDD) 16(4):1\u201324","journal-title":"ACM Trans Knowl Discov Data (TKDD)"},{"key":"5910_CR39","unstructured":"Li X, Zhang Z, Wei G, Lan C, Zeng W, Jin X, Chen Z (2021) Confounder identification-free causal visual feature learning, 1\u201321. arXiv preprint arXiv:2111.13420"},{"issue":"20","key":"5910_CR40","doi-asserted-by":"publisher","first-page":"6347","DOI":"10.1002\/cpe.6347","volume":"33","author":"L Li","year":"2021","unstructured":"Li L, Lin Y, Zhao H, Chen J, Li S (2021) Causality-based online streaming feature selection. Concurr Comput Pract Exp 33(20):6347","journal-title":"Concurr Comput Pract Exp"},{"key":"5910_CR41","doi-asserted-by":"publisher","first-page":"3513","DOI":"10.1109\/TIP.2021.3062192","volume":"30","author":"P Wu","year":"2021","unstructured":"Wu P, Liu J (2021) Learning causal temporal relation and feature discrimination for anomaly detection. IEEE Trans Image Process 30:3513\u20133527","journal-title":"IEEE Trans Image Process"},{"key":"5910_CR42","doi-asserted-by":"crossref","unstructured":"Zhang X, Wong Y, Wu X, Lu J, Kankanhalli M, Li X, Geng W (2021) Learning causal representation for training cross-domain pose estimator via generative interventions. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 11270\u201311280","DOI":"10.1109\/ICCV48922.2021.01108"},{"key":"5910_CR43","doi-asserted-by":"crossref","unstructured":"Wang T, Zhou C, Sun Q, Zhang H (2021) Causal attention for unbiased visual recognition. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 3091\u20133100","DOI":"10.1109\/ICCV48922.2021.00308"},{"key":"5910_CR44","doi-asserted-by":"crossref","unstructured":"Yue Z, Wang T, Sun Q, Hua X-S, Zhang H (2021) Counterfactual zero-shot and open-set visual recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 15404\u201315414","DOI":"10.1109\/CVPR46437.2021.01515"},{"key":"5910_CR45","doi-asserted-by":"crossref","unstructured":"Li W, Li Z (2022) Causal-setr: a segmentation transformer variant based on causal intervention. In: Proceedings of the Asian conference on computer vision, pp 756\u2013772","DOI":"10.1007\/978-3-031-26293-7_25"},{"key":"5910_CR46","first-page":"655","volume":"33","author":"D Zhang","year":"2020","unstructured":"Zhang D, Zhang H, Tang J, Hua X-S, Sun Q (2020) Causal intervention for weakly-supervised semantic segmentation. Adv Neural Inf Process Syst 33:655\u2013666","journal-title":"Adv Neural Inf Process Syst"},{"key":"5910_CR47","doi-asserted-by":"crossref","unstructured":"Wang T, Huang J, Zhang H, Sun Q (2020) Visual commonsense r-cnn. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10760\u201310770","DOI":"10.1109\/CVPR42600.2020.01077"},{"key":"5910_CR48","unstructured":"Goudet O, Kalainathan D, Caillou P, Guyon I, Lopez-Paz D, Sebag M (2017) Causal generative neural networks, 1\u20137. arXiv preprint arXiv:1711.08936"},{"key":"5910_CR49","unstructured":"Wang D, Yang Y, Tao C, Gan Z, Chen L, Kong F, Henao R, Carin L (2020) Proactive pseudo-intervention: causally informed contrastive learning for interpretable vision models, 1\u201319. arXiv preprint arXiv:2012.03369"},{"key":"5910_CR50","doi-asserted-by":"crossref","unstructured":"Lin G, Xu Y, Lai H, Yin J (2024) Revisiting few-shot learning from a causal perspective. IEEE Trans Knowl Data Eng, 1\u201313","DOI":"10.1109\/TKDE.2024.3397689"},{"issue":"11","key":"5910_CR51","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y Lecun","year":"1998","unstructured":"Lecun Y, Bottou L (1998) Gradient-based learning applied to document recognition. Proc IEEE 86(11):2278\u20132324","journal-title":"Proc IEEE"},{"key":"5910_CR52","doi-asserted-by":"crossref","unstructured":"Liu Z, Lin Y, Cao Y, Hu H, Wei Y, Zhang Z, Lin S, Guo B (2021) Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 10012\u201310022","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"5910_CR53","doi-asserted-by":"crossref","unstructured":"Wang W, Xie E, Li X, Fan D-P, Song K, Liang D, Lu T, Luo P, Shao L (2021) Pyramid vision transformer: a versatile backbone for dense prediction without convolutions. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 568\u2013578","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"5910_CR54","doi-asserted-by":"crossref","unstructured":"Strudel R, Garcia R, Laptev I, Schmid C (2021) Segmenter: transformer for semantic segmentation. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 7262\u20137272","DOI":"10.1109\/ICCV48922.2021.00717"},{"key":"5910_CR55","unstructured":"Chen J, Lu Y, Yu Q, Luo X, Adeli E, Wang Y, Lu L, Yuille AL, Zhou Y (2021) Transunet: transformers make strong encoders for medical image segmentation abs\/2102.04306:1\u201313"},{"key":"5910_CR56","doi-asserted-by":"crossref","unstructured":"Carion N, Massa F, Synnaeve G, Usunier N, Kirillov A, Zagoruyko S (2020) End-to-end object detection with transformers. In: European conference on computer vision, pp 213\u2013229","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"5910_CR57","doi-asserted-by":"crossref","unstructured":"Graham B, El-Nouby A, Touvron H, Stock P, Joulin A, J\u00e9gou H, Douze M (2021) Levit: a vision transformer in convnet\u2019s clothing for faster inference. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 12259\u201312269","DOI":"10.1109\/ICCV48922.2021.01204"},{"key":"5910_CR58","first-page":"3965","volume":"34","author":"Z Dai","year":"2021","unstructured":"Dai Z, Liu H, Le QV, Tan M (2021) Coatnet: marrying convolution and attention for all data sizes. Adv Neural Inf Process Syst 34:3965\u20133977","journal-title":"Adv Neural Inf Process Syst"},{"key":"5910_CR59","doi-asserted-by":"crossref","unstructured":"Peng Z, Huang W, Gu S, Xie L, Wang Y, Jiao J, Ye Q (2021) Conformer: local features coupling global representations for visual recognition. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 367\u2013376","DOI":"10.1109\/ICCV48922.2021.00042"},{"key":"5910_CR60","doi-asserted-by":"crossref","unstructured":"Chen Y, Dai X, Chen D, Liu M, Dong X, Yuan L, Liu Z (2022) Mobile-former: bridging mobilenet and transformer. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 5270\u20135279","DOI":"10.1109\/CVPR52688.2022.00520"},{"key":"5910_CR61","doi-asserted-by":"crossref","unstructured":"Yuan K, Guo S, Liu Z, Zhou A, Yu F, Wu W (2021) Incorporating convolution designs into visual transformers. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 579\u2013588","DOI":"10.1109\/ICCV48922.2021.00062"},{"key":"5910_CR62","unstructured":"Li Y, Zhang K, Cao J, Timofte R, Van\u00a0Gool L (2021) Localvit: Bringing locality to vision transformers, 1\u201310. arXiv preprint arXiv:2104.05707"},{"key":"5910_CR63","doi-asserted-by":"crossref","unstructured":"Srinivas A, Lin T-Y, Parmar N, Shlens J, Abbeel P, Vaswani A (2021) Bottleneck transformers for visual recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 16519\u201316529","DOI":"10.1109\/CVPR46437.2021.01625"},{"key":"5910_CR64","unstructured":"Mehta S, Rastegari M (2021) Mobilevit: light-weight, general-purpose, and mobile-friendly vision transformer, 1\u201326. arXiv preprint arXiv:2110.02178"},{"key":"5910_CR65","doi-asserted-by":"crossref","unstructured":"Guo J, Han K, Wu H, Tang Y, Chen X, Wang Y, Xu C (2022) Cmt: convolutional neural networks meet vision transformers. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 12175\u201312185","DOI":"10.1109\/CVPR52688.2022.01186"},{"key":"5910_CR66","unstructured":"Yan H, Li Z, Li W, Wang C, Wu M, Zhang C (2021) Contnet: why not use convolution and transformer at the same time?, 1\u201312. arXiv preprint arXiv:2104.13497"},{"key":"5910_CR67","unstructured":"Huang Z, Ben Y, Luo G, Cheng P, Yu G, Fu B (2021) Shuffle transformer: rethinking spatial shuffle for vision transformer, 1\u201312. arXiv preprint arXiv:2106.03650"},{"key":"5910_CR68","doi-asserted-by":"crossref","unstructured":"Pan Z, Zhuang B, Liu J, He H, Cai J (2021) Scalable vision transformers with hierarchical pooling. In: Proceedings of the IEEE\/cvf international conference on computer vision, pp 377\u2013386","DOI":"10.1109\/ICCV48922.2021.00043"},{"key":"5910_CR69","doi-asserted-by":"crossref","unstructured":"Marr D (2010) Vision: a computational investigation into the human representation and processing of visual information. MIT press, Cambridge","DOI":"10.7551\/mitpress\/9780262514620.001.0001"},{"issue":"8","key":"5910_CR70","doi-asserted-by":"publisher","first-page":"880","DOI":"10.1038\/nn1278","volume":"7","author":"SL Brincat","year":"2004","unstructured":"Brincat SL, Connor CE (2004) Underlying principles of visual shape selectivity in posterior inferotemporal cortex. Nat Neurosci 7(8):880\u2013886","journal-title":"Nat Neurosci"},{"key":"5910_CR71","doi-asserted-by":"crossref","unstructured":"Essen DCv (1997) A tension-based theory of morphogenesis and compact wiring in the central nervous system. Nature 385(6614):313\u2013318","DOI":"10.1038\/385313a0"},{"issue":"3","key":"5910_CR72","doi-asserted-by":"publisher","first-page":"574","DOI":"10.1113\/jphysiol.1959.sp006308","volume":"148","author":"DH Hubel","year":"1959","unstructured":"Hubel DH, Wiesel TN et al (1959) Receptive fields of single neurones in the cat\u2019s striate cortex. J Physiol 148(3):574\u2013591","journal-title":"J Physiol"},{"issue":"1","key":"5910_CR73","first-page":"1","volume":"1","author":"DJ Felleman","year":"1991","unstructured":"Felleman DJ, Van Essen DC (1991) Distributed hierarchical processing in the primate cerebral cortex. Cerebral cortex (New York, NY: 1991) 1(1):1\u201347","journal-title":"Cerebral cortex (New York, NY: 1991)"},{"key":"5910_CR74","doi-asserted-by":"crossref","unstructured":"Zhang X, Cui P, Xu R, Zhou L, He Y, Shen Z (2021) Deep stable learning for out-of-distribution generalization. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 5372\u20135382","DOI":"10.1109\/CVPR46437.2021.00533"},{"issue":"2","key":"5910_CR75","first-page":"1073","volume":"16","author":"R Yadav","year":"2024","unstructured":"Yadav R, Priyanka Kacker P (2024) Automedsys: automatic facial micro-expression detection system using random fourier features based neural network. Int J Inf Technol 16(2):1073\u20131086","journal-title":"Int J Inf Technol"},{"key":"5910_CR76","doi-asserted-by":"crossref","unstructured":"Russakovsky O, Deng J, Su H, Krause J, Satheesh S, Ma S, Huang Z, Karpathy A, Khosla A, Bernstein M et al (2015) Imagenet large scale visual recognition challenge. Int J Comput Vis 115:211\u2013252","DOI":"10.1007\/s11263-015-0816-y"},{"key":"5910_CR77","unstructured":"Krizhevsky A, Nair V, Hinton G (2009) Cifar-10 and cifar-100 datasets 6(1):1. https:\/\/www.cs.toronto.edu\/kriz\/cifar. html"},{"key":"5910_CR78","unstructured":"Howard AG (2047) Mobilenets: efficient convolutional neural networks for mobile vision applications, 1\u20139. arXiv preprint arXiv:1704.04861"},{"key":"5910_CR79","doi-asserted-by":"crossref","unstructured":"Sandler M, Howard A, Zhu M, Zhmoginov A, Chen L-C (2018) Mobilenetv2: inverted residuals and linear bottlenecks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4510\u20134520","DOI":"10.1109\/CVPR.2018.00474"},{"key":"5910_CR80","doi-asserted-by":"crossref","unstructured":"Howard A, Sandler M, Chu G, Chen L-C, Chen B, Tan M, Wang W, Zhu Y, Pang R, Vasudevan V et al (2019) Searching for mobilenetv3. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 1314\u20131324","DOI":"10.1109\/ICCV.2019.00140"},{"key":"5910_CR81","doi-asserted-by":"crossref","unstructured":"Mehta S, Rastegari M, Shapiro L, Hajishirzi H (2019) Espnetv2: a light-weight, power efficient, and general purpose convolutional neural network. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 9190\u20139200","DOI":"10.1109\/CVPR.2019.00941"},{"key":"5910_CR82","doi-asserted-by":"crossref","unstructured":"Ma N, Zhang X, Zheng H-T, Sun J (2018) Shufflenet v2: practical guidelines for efficient cnn architecture design. In: Proceedings of the European Conference on Computer Vision (ECCV), pp 116\u2013131","DOI":"10.1007\/978-3-030-01264-9_8"},{"key":"5910_CR83","doi-asserted-by":"crossref","unstructured":"Huang G, Liu Z, Van Der\u00a0Maaten L, Weinberger KQ (2017) Densely connected convolutional networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4700\u20134708","DOI":"10.1109\/CVPR.2017.243"},{"key":"5910_CR84","doi-asserted-by":"crossref","unstructured":"Woo S, Debnath S, Hu R, Chen X, Liu Z, Kweon IS, Xie S (2023) Convnext v2: co-designing and scaling convnets with masked autoencoders. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 16133\u201316142","DOI":"10.1109\/CVPR52729.2023.01548"},{"key":"5910_CR85","doi-asserted-by":"crossref","unstructured":"Zhang P, Dai X, Yang J, Xiao B, Yuan L, Zhang L, Gao J (2021) Multi-scale vision longformer: a new vision transformer for high-resolution image encoding. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 2998\u20133008","DOI":"10.1109\/ICCV48922.2021.00299"},{"key":"5910_CR86","doi-asserted-by":"crossref","unstructured":"Wang W, Xie E, Li X, Fan D-P, Song K, Liang D, Lu T, Luo P, Shao L (2022) Pvt v2: improved baselines with pyramid vision transformer. Comput Vis Media 8(3):415\u2013424","DOI":"10.1007\/s41095-022-0274-8"},{"issue":"11","key":"5910_CR87","doi-asserted-by":"publisher","first-page":"12760","DOI":"10.1109\/TPAMI.2022.3202765","volume":"45","author":"Y-H Wu","year":"2022","unstructured":"Wu Y-H, Liu Y, Zhan X, Cheng M-M (2022) P2t: pyramid pooling transformer for scene understanding. IEEE Trans Pattern Anal Mach Intell 45(11):12760\u201312771","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"5910_CR88","unstructured":"Mehta S, Rastegari M (2022) Separable self-attention for mobile vision transformers, 1\u201318. arXiv preprint arXiv:arXiv:2206.02680"},{"key":"5910_CR89","doi-asserted-by":"crossref","unstructured":"Yuan L, Chen Y, Wang T, Yu W, Shi Y, Jiang Z-H, Tay FE, Feng J, Yan S (2021) Tokens-to-token vit: training vision transformers from scratch on imagenet. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 558\u2013567","DOI":"10.1109\/ICCV48922.2021.00060"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-024-05910-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-024-05910-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-024-05910-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,2]],"date-time":"2025-01-02T15:16:19Z","timestamp":1735830979000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-024-05910-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,2]]},"references-count":89,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,1]]}},"alternative-id":["5910"],"URL":"https:\/\/doi.org\/10.1007\/s10489-024-05910-3","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,12,2]]},"assertion":[{"value":"22 October 2024","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 December 2024","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"This study is based on publicly available data for which ethical approval is not required. All data are de-identified and collected in a manner consistent with ethical standards for research.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical and informed consent for data used"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing Interests"}}],"article-number":"68"}}