{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T09:28:50Z","timestamp":1766050130728,"version":"3.37.3"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2022,9,20]],"date-time":"2022-09-20T00:00:00Z","timestamp":1663632000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,9,20]],"date-time":"2022-09-20T00:00:00Z","timestamp":1663632000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62066035","61962044"],"award-info":[{"award-number":["62066035","61962044"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2023,1]]},"DOI":"10.1007\/s00521-022-07789-y","type":"journal-article","created":{"date-parts":[[2022,9,20]],"date-time":"2022-09-20T16:03:13Z","timestamp":1663689793000},"page":"735-748","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Exploring vision transformer: classifying electron-microscopy pollen images with transformer"],"prefix":"10.1007","volume":"35","author":[{"given":"Kaibo","family":"Duan","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1107-5679","authenticated-orcid":false,"given":"Shi","family":"Bao","sequence":"additional","affiliation":[]},{"given":"Zhiqiang","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Shaodong","family":"Cui","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,9,20]]},"reference":[{"key":"7789_CR1","unstructured":"Ba JL, Kiros JR, Hinton GE (2016) Layer normalization. arXiv preprint arXiv:1607.06450"},{"key":"7789_CR2","doi-asserted-by":"crossref","unstructured":"Carion N, Massa F, Synnaeve G, Usunier N, Kirillov A, Zagoruyko S (2020) End-to-end object detection with transformers. In: European Conference on Computer Vision, Springer, pp 213\u2013229","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"7789_CR3","doi-asserted-by":"crossref","unstructured":"Chen H, Wang Y, Guo T, Xu C, Deng Y, Liu Z, Ma S, Xu C, Xu C, Gao W (2021) Pre-trained image processing transformer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 12299\u201312310","DOI":"10.1109\/CVPR46437.2021.01212"},{"key":"7789_CR4","doi-asserted-by":"crossref","unstructured":"Cubuk ED, Zoph B, Mane D, Vasudevan V, Le QV (2019) Autoaugment: learning augmentation strategies from data. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 113\u2013123","DOI":"10.1109\/CVPR.2019.00020"},{"key":"7789_CR5","doi-asserted-by":"crossref","unstructured":"Dai J, Qi H, Xiong Y, Li Y, Zhang G, Hu H, Wei Y (2017) Deformable convolutional networks. In: Proceedings of the IEEE international conference on computer vision, pp 764\u2013773","DOI":"10.1109\/ICCV.2017.89"},{"key":"7789_CR6","unstructured":"Devlin J, Chang MW, Lee K, Toutanova K (2018) Bert: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805"},{"key":"7789_CR7","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S, et\u00a0al. (2020) An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929"},{"issue":"9","key":"7789_CR8","doi-asserted-by":"publisher","first-page":"976","DOI":"10.1111\/j.1398-9995.2007.01393.x","volume":"62","author":"G D\u2019Amato","year":"2007","unstructured":"D\u2019Amato G, Cecchi L, Bonini S, Nunes C, Annesi-Maesano I, Behrendt H, Liccardi G, Popov T, Van Cauwenberge P (2007) Allergenic pollen and pollen allergy in Europe. Allergy 62(9):976\u2013990","journal-title":"Allergy"},{"key":"7789_CR9","doi-asserted-by":"crossref","unstructured":"Gao F, Mu X, Ouyang C, Yang K, Ji S, Guo J, Wei H, Wang N, Ma L, Yang B (2022) Mltdnet: an efficient multi-level transformer network for single image deraining. Neural Comput Appl, pp 1\u201315","DOI":"10.1007\/s00521-022-07226-0"},{"key":"7789_CR10","doi-asserted-by":"crossref","unstructured":"Ghofrani A, Mahdian\u00a0Toroghi R (2022) Knowledge distillation in plant disease recognition. Neural Comput Appl, pp 1\u201310","DOI":"10.1007\/s00521-021-06882-y"},{"issue":"6","key":"7789_CR11","doi-asserted-by":"publisher","first-page":"e0157044","DOI":"10.1371\/journal.pone.0157044","volume":"11","author":"AB Goncalves","year":"2016","unstructured":"Goncalves AB, Souza JS, Silva GGd, Cereda MP, Pott A, Naka MH, Pistori H (2016) Feature extraction and machine learning for the classification of Brazilian savannah pollen grains. PLoS one 11(6):e0157044","journal-title":"PLoS one"},{"key":"7789_CR12","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"7789_CR13","unstructured":"Hendrycks D, Gimpel K (2016) Bridging nonlinearities and stochastic regularizers with gaussian error linear units"},{"key":"7789_CR14","doi-asserted-by":"crossref","unstructured":"Heo B, Yun S, Han D, Chun S, Choe J, Oh SJ (2021) Rethinking spatial dimensions of vision transformers. arXiv preprint arXiv:2103.16302","DOI":"10.1109\/ICCV48922.2021.01172"},{"key":"7789_CR15","unstructured":"Hinton G, Vinyals O, Dean J (2015) Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531"},{"key":"7789_CR16","doi-asserted-by":"crossref","unstructured":"Huang G, Liu Z, Van Der\u00a0Maaten L, Weinberger KQ (2017) Densely connected convolutional networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4700\u20134708","DOI":"10.1109\/CVPR.2017.243"},{"key":"7789_CR17","unstructured":"Hughes D, Salath\u00e9 M, et\u00a0al. (2015) An open access repository of images on plant health to enable the development of mobile disease diagnostics. arXiv preprint arXiv:1511.08060"},{"key":"7789_CR18","first-page":"1097","volume":"25","author":"A Krizhevsky","year":"2012","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) Imagenet classification with deep convolutional neural networks. Adv Neural Inf Process Syst 25:1097\u20131105","journal-title":"Adv Neural Inf Process Syst"},{"key":"7789_CR19","doi-asserted-by":"crossref","unstructured":"LeCun Y, Haffner P, Bottou L, Bengio Y (1999) Object recognition with gradient-based learning. In: Shape, contour and grouping in computer vision, Springer, pp 319\u2013345","DOI":"10.1007\/3-540-46805-6_19"},{"key":"7789_CR20","doi-asserted-by":"crossref","unstructured":"Ledig C, Theis L, Husz\u00e1r F, Caballero J, Cunningham A, Acosta A, Aitken A, Tejani A, Totz J, Wang Z, et\u00a0al. (2017) Photo-realistic single image super-resolution using a generative adversarial network. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4681\u20134690","DOI":"10.1109\/CVPR.2017.19"},{"key":"7789_CR21","doi-asserted-by":"crossref","unstructured":"Liu Z, Lin Y, Cao Y, Hu H, Wei Y, Zhang Z, Lin S, Guo B (2021) Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 10012\u201310022","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"7789_CR22","doi-asserted-by":"crossref","unstructured":"Long J, Shelhamer E, Darrell T (2015) Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3431\u20133440","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"7789_CR23","unstructured":"Loshchilov I, Hutter F (2017) Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101"},{"key":"7789_CR24","doi-asserted-by":"crossref","unstructured":"Mao X, Qi G, Chen Y, Li X, Duan R, Ye S, He Y, Xue H (2021) Towards robust vision transformer. arXiv preprint arXiv:2105.07926","DOI":"10.1109\/CVPR52688.2022.01173"},{"key":"7789_CR25","unstructured":"Radford A, Narasimhan K, Salimans T, Sutskever I (2018) Improving language understanding by generative pre-training"},{"key":"7789_CR26","doi-asserted-by":"crossref","unstructured":"Radosavovic I, Kosaraju RP, Girshick R, He K, Doll\u00e1r P (2020) Designing network design spaces. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 10428\u201310436","DOI":"10.1109\/CVPR42600.2020.01044"},{"key":"7789_CR27","first-page":"91","volume":"28","author":"S Ren","year":"2015","unstructured":"Ren S, He K, Girshick R, Sun J (2015) Faster r-cnn: towards real-time object detection with region proposal networks. Adv Neural Inf Process Syst 28:91\u201399","journal-title":"Adv Neural Inf Process Syst"},{"key":"7789_CR28","doi-asserted-by":"publisher","first-page":"425","DOI":"10.1590\/0102-33062015abb0077","volume":"29","author":"AKMG Sarwar","year":"2015","unstructured":"Sarwar AKMG, Hoshino Y, Araki H (2015) Pollen morphology and its taxonomic significance in the genus bomarea mirb. (alstroemeriaceae)-i. subgenera baccata, sphaerine, and wichuraea. Acta bot bras 29:425\u2013432","journal-title":"Acta bot bras"},{"key":"7789_CR29","doi-asserted-by":"crossref","unstructured":"Selvaraju RR, Cogswell M, Das A, Vedantam R, Parikh D, Batra D (2017) Grad-cam: Visual explanations from deep networks via gradient-based localization. In: Proceedings of the IEEE international conference on computer vision, pp 618\u2013626","DOI":"10.1109\/ICCV.2017.74"},{"key":"7789_CR30","doi-asserted-by":"crossref","unstructured":"Shaw P, Uszkoreit J, Vaswani A (2018) Self-attention with relative position representations. arXiv preprint arXiv:1803.02155","DOI":"10.18653\/v1\/N18-2074"},{"key":"7789_CR31","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556"},{"issue":"6701","key":"7789_CR32","doi-asserted-by":"publisher","first-page":"449","DOI":"10.1038\/26646","volume":"395","author":"R Szibor","year":"1998","unstructured":"Szibor R, Schubert C, Sch\u00f6ning R, Krause D, Wendt U (1998) Pollen analysis reveals murder season. Nature 395(6701):449\u2013450","journal-title":"Nature"},{"key":"7789_CR33","unstructured":"Tan M, Le Q (2019) Efficientnet: rethinking model scaling for convolutional neural networks. In: International Conference on Machine Learning, PMLR, pp 6105\u20136114"},{"key":"7789_CR34","doi-asserted-by":"crossref","unstructured":"Tang Y, Wang B, He W, Qian F (2022) Pointdet++: an object detection framework based on human local features with transformer encoder. Neural Comput Appl, pp 1\u201312","DOI":"10.1007\/s00521-022-06938-7"},{"key":"7789_CR35","unstructured":"Touvron H, Cord M, Douze M, Massa F, Sablayrolles A, J\u00e9gou H (2021) Training data-efficient image transformers & distillation through attention. In: International Conference on Machine Learning, PMLR, pp 10347\u201310357"},{"key":"7789_CR36","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. In: Advances in neural information processing systems, pp 5998\u20136008"},{"key":"7789_CR37","doi-asserted-by":"crossref","unstructured":"Wang A, Singh A, Michael J, Hill F, Levy O, Bowman SR (2018a) Glue: a multi-task benchmark and analysis platform for natural language understanding. arXiv preprint arXiv:1804.07461","DOI":"10.18653\/v1\/W18-5446"},{"key":"7789_CR38","doi-asserted-by":"crossref","unstructured":"Wang X, Girshick R, Gupta A, He K (2018b) Non-local neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7794\u20137803","DOI":"10.1109\/CVPR.2018.00813"},{"issue":"12","key":"7789_CR39","doi-asserted-by":"publisher","first-page":"10051","DOI":"10.1007\/s00521-022-06990-3","volume":"34","author":"Y Wang","year":"2022","unstructured":"Wang Y, Xie Y, Fan L, Hu G (2022) Stmg: Swin transformer for multi-label image recognition with graph convolution network. Neural Comput Appl 34(12):10051\u201310063","journal-title":"Neural Comput Appl"},{"key":"7789_CR40","doi-asserted-by":"crossref","unstructured":"Wu H, Xiao B, Codella N, Liu M, Dai X, Yuan L, Zhang L (2021) Cvt: introducing convolutions to vision transformers. arXiv preprint arXiv:2103.15808","DOI":"10.1109\/ICCV48922.2021.00009"},{"key":"7789_CR41","unstructured":"Xiao H, Rasul K, Vollgraf R (2017) Fashion-mnist: a novel image dataset for benchmarking machine learning algorithms. arXiv preprint arXiv:1708.07747"},{"key":"7789_CR42","doi-asserted-by":"crossref","unstructured":"Xie S, Girshick R, Doll\u00e1r P, Tu Z, He K (2017) Aggregated residual transformations for deep neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1492\u20131500","DOI":"10.1109\/CVPR.2017.634"},{"key":"7789_CR43","doi-asserted-by":"crossref","unstructured":"Yuan L, Chen Y, Wang T, Yu W, Shi Y, Jiang Z, Tay FE, Feng J, Yan S (2021) Tokens-to-token vit: Training vision transformers from scratch on imagenet. arXiv preprint arXiv:2101.11986","DOI":"10.1109\/ICCV48922.2021.00060"},{"key":"7789_CR44","doi-asserted-by":"crossref","unstructured":"Zagoruyko S, Komodakis N (2016) Wide residual networks. arXiv preprint arXiv:1605.07146","DOI":"10.5244\/C.30.87"},{"key":"7789_CR45","doi-asserted-by":"crossref","unstructured":"Zeiler MD, Fergus R (2014) Visualizing and understanding convolutional networks. In: European conference on computer vision, Springer, pp 818\u2013833","DOI":"10.1007\/978-3-319-10590-1_53"},{"key":"7789_CR46","unstructured":"Zhou D, Kang B, Jin X, Yang L, Lian X, Jiang Z, Hou Q, Feng J (2021) Deepvit: towards deeper vision transformer. arXiv preprint arXiv:2103.11886"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-022-07789-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-022-07789-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-022-07789-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,7]],"date-time":"2023-01-07T06:14:16Z","timestamp":1673072056000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-022-07789-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,9,20]]},"references-count":46,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2023,1]]}},"alternative-id":["7789"],"URL":"https:\/\/doi.org\/10.1007\/s00521-022-07789-y","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"type":"print","value":"0941-0643"},{"type":"electronic","value":"1433-3058"}],"subject":[],"published":{"date-parts":[[2022,9,20]]},"assertion":[{"value":"25 January 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 September 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 September 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}