{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T13:27:30Z","timestamp":1772803650994,"version":"3.50.1"},"reference-count":51,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2023,10,18]],"date-time":"2023-10-18T00:00:00Z","timestamp":1697587200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,10,18]],"date-time":"2023-10-18T00:00:00Z","timestamp":1697587200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2024,3]]},"DOI":"10.1007\/s11227-023-05701-6","type":"journal-article","created":{"date-parts":[[2023,10,18]],"date-time":"2023-10-18T15:02:54Z","timestamp":1697641374000},"page":"6221-6239","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Fine-grained bird image classification based on counterfactual method of vision transformer model"],"prefix":"10.1007","volume":"80","author":[{"given":"Tianhua","family":"Chen","sequence":"first","affiliation":[]},{"given":"Yanyue","family":"Li","sequence":"additional","affiliation":[]},{"given":"Qinghua","family":"Qiao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,10,18]]},"reference":[{"issue":"1","key":"5701_CR1","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1016\/j.tree.2015.11.005","volume":"31","author":"JB Socolar","year":"2016","unstructured":"Socolar JB, Gilroy JJ, Kunin WE, Edwards DP (2016) How should beta-diversity inform biodiversity conservation. Trends Ecol Evolut 31(1):67\u201380","journal-title":"Trends Ecol Evolut"},{"key":"5701_CR2","unstructured":"Wah C, Branson S, Welinder P, Perona P, Belongie S (2011) The caltech-ucsd birds-200-2011 dataset"},{"key":"5701_CR3","doi-asserted-by":"crossref","unstructured":"Van\u00a0Horn G, Branson S, Farrell R, Haber S, Barry J, Ipeirotis P, Perona P, Belongie S (2015) Building a bird recognition app and large scale dataset with citizen scientists: the fine print in fine-grained dataset collection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 595\u2013604","DOI":"10.1109\/CVPR.2015.7298658"},{"issue":"6","key":"5701_CR4","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"A Krizhevsky","year":"2017","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2017) Imagenet classification with deep convolutional neural networks. Commun ACM 60(6):84\u201390","journal-title":"Commun ACM"},{"key":"5701_CR5","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556"},{"key":"5701_CR6","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"5701_CR7","doi-asserted-by":"crossref","unstructured":"Zhang N, Donahue J, Girshick R, Darrell T (2014) Part-based r-cnns for fine-grained category detection. In: Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part I 13, pp 834\u2013849. Springer","DOI":"10.1007\/978-3-319-10590-1_54"},{"key":"5701_CR8","doi-asserted-by":"crossref","unstructured":"Huang S, Xu Z, Tao D, Zhang Y (2016) Part-stacked cnn for fine-grained visual categorization. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 1173\u20131182","DOI":"10.1109\/CVPR.2016.132"},{"key":"5701_CR9","doi-asserted-by":"crossref","unstructured":"He K, Gkioxari G, Doll\u00e1r P, Girshick R (2017) Mask r-cnn. In: Proceedings of the IEEE International Conference on Computer Vision, pp 2961\u20132969","DOI":"10.1109\/ICCV.2017.322"},{"key":"5701_CR10","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S (2020) An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929"},{"key":"5701_CR11","unstructured":"Tan M, Le Q (2019) Efficientnet: Rethinking model scaling for convolutional neural networks. In: International Conference on Machine Learning, pp 6105\u20136114. PMLR"},{"key":"5701_CR12","doi-asserted-by":"crossref","unstructured":"Huang G, Liu Z, Van Der\u00a0Maaten L, Weinberger KQ (2017) Densely connected convolutional networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 4700\u20134708","DOI":"10.1109\/CVPR.2017.243"},{"key":"5701_CR13","doi-asserted-by":"crossref","unstructured":"Branson S, Van\u00a0Horn G, Belongie S, Perona P (2014) Bird species categorization using pose normalized deep convolutional nets. arXiv preprint arXiv:1406.2952","DOI":"10.5244\/C.28.87"},{"key":"5701_CR14","unstructured":"Wei X-S, Xie C-W, Wu J (2016) Mask-cnn: Localizing parts and selecting descriptors for fine-grained image recognition. arXiv preprint arXiv:1605.06878"},{"key":"5701_CR15","doi-asserted-by":"crossref","unstructured":"Gao Y, Beijbom O, Zhang N, Darrell T (2016) Compact bilinear pooling. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 317\u2013326","DOI":"10.1109\/CVPR.2016.41"},{"key":"5701_CR16","doi-asserted-by":"crossref","unstructured":"Fukui A, Park DH, Yang D, Rohrbach A, Darrell T, Rohrbach M (2016) Multimodal compact bilinear pooling for visual question answering and visual grounding. arXiv preprint arXiv:1606.01847","DOI":"10.18653\/v1\/D16-1044"},{"key":"5701_CR17","doi-asserted-by":"crossref","unstructured":"Cui Y, Zhou F, Wang J, Liu X, Lin Y, Belongie S (2017) Kernel pooling for convolutional neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 2921\u20132930","DOI":"10.1109\/CVPR.2017.325"},{"key":"5701_CR18","doi-asserted-by":"crossref","unstructured":"Girshick R (2015) Fast r-cnn. In: Proceedings of the IEEE International Conference on Computer Vision, pp 1440\u20131448","DOI":"10.1109\/ICCV.2015.169"},{"key":"5701_CR19","doi-asserted-by":"crossref","unstructured":"Yang Z, Luo T, Wang D, Hu Z, Gao J, Wang L (2018) Learning to navigate for fine-grained classification. In: Proceedings of the European Conference on Computer Vision (ECCV), pp 420\u2013435","DOI":"10.1007\/978-3-030-01264-9_26"},{"key":"5701_CR20","doi-asserted-by":"crossref","unstructured":"Liu C, Xie H, Zha ZJ, Ma L, Zhang Y (2020) Filtration and distillation: enhancing region attention for fine-grained visual categorization. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol 34, No (7), pp 11555\u201311562","DOI":"10.1609\/aaai.v34i07.6822"},{"key":"5701_CR21","doi-asserted-by":"crossref","unstructured":"Ge W, Lin X, Yu Y (2019) Weakly supervised complementary parts models for fine-grained image classification from the bottom up. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 3034\u20133043","DOI":"10.1109\/CVPR.2019.00315"},{"key":"5701_CR22","doi-asserted-by":"crossref","unstructured":"Rao Y, Chen G, Lu J, Zhou J (2021) Counterfactual attention learning for fine-grained visual categorization and re-identification. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 1025\u20131034","DOI":"10.1109\/ICCV48922.2021.00106"},{"key":"5701_CR23","doi-asserted-by":"crossref","unstructured":"Zheng H, Fu J, Mei T, Luo J (2017) Learning multi-attention convolutional neural network for fine-grained image recognition. In: Proceedings of the IEEE International Conference on Computer Vision, pp 5209\u20135217","DOI":"10.1109\/ICCV.2017.557"},{"key":"5701_CR24","doi-asserted-by":"crossref","unstructured":"Sun M, Yuan Y, Zhou F, Ding E (2018) Multi-attention multi-class constraint for fine-grained image recognition. In: Proceedings of the European Conference on Computer Vision (ECCV), pp 805\u2013821","DOI":"10.1007\/978-3-030-01270-0_49"},{"key":"5701_CR25","doi-asserted-by":"crossref","unstructured":"Zhuang P, Wang Y, Qiao Y (2020) Learning attentive pairwise interaction for fine-grained classification. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol 34, pp 13130\u201313137","DOI":"10.1609\/aaai.v34i07.7016"},{"key":"5701_CR26","unstructured":"Hu T, Qi H, Huang Q, Lu Y (2019) See better before looking closer: weakly supervised data augmentation network for fine-grained visual classification. arXiv preprint arXiv:1901.09891"},{"key":"5701_CR27","doi-asserted-by":"crossref","unstructured":"Dai Z, Yang Z, Yang Y, Carbonell J, Le QV, Salakhutdinov R (2019) Transformer-xl: attentive language models beyond a fixed-length context. arXiv preprint arXiv:1901.02860","DOI":"10.18653\/v1\/P19-1285"},{"key":"5701_CR28","unstructured":"Devlin J, Chang M-W, Lee K, Toutanova K (2018) Bert: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805"},{"key":"5701_CR29","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. Adv Neural Inf Process Syst 30"},{"key":"5701_CR30","doi-asserted-by":"crossref","unstructured":"Girdhar R, Carreira J, Doersch C, Zisserman A (2019) Video action transformer network. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 244\u2013253","DOI":"10.1109\/CVPR.2019.00033"},{"key":"5701_CR31","doi-asserted-by":"crossref","unstructured":"He J, Chen JN, Liu S, Kortylewski A, Yang C, Bai Y, Wang C (2022) Transfg: a transformer architecture for fine-grained recognition. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol 36, pp 852\u2013860","DOI":"10.1609\/aaai.v36i1.19967"},{"key":"5701_CR32","doi-asserted-by":"crossref","unstructured":"Zhang Y, Cao J, Zhang L, Liu X, Wang Z, Ling F, Chen W (2022) A free lunch from vit: adaptive attention multi-scale fusion transformer for fine-grained visual recognition. In: ICASSP 2022-2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp 3234\u20133238. IEEE","DOI":"10.1109\/ICASSP43922.2022.9747591"},{"key":"5701_CR33","doi-asserted-by":"crossref","unstructured":"Misra I, Girdhar R, Joulin A (2021) An end-to-end transformer model for 3d object detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 2906\u20132917","DOI":"10.1109\/ICCV48922.2021.00290"},{"key":"5701_CR34","unstructured":"Zhu X, Su W, Lu L, Li B, Wang X, Dai J (2020) Deformable detr: deformable transformers for end-to-end object detection. arXiv preprint arXiv:2010.04159"},{"key":"5701_CR35","doi-asserted-by":"crossref","unstructured":"Zheng S, Lu J, Zhao H, Zhu X, Luo Z, Wang Y, Fu Y, Feng J, Xiang T, Torr PH, et al. (2021) Rethinking semantic segmentation from a sequence-to-sequence perspective with transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 6881\u20136890","DOI":"10.1109\/CVPR46437.2021.00681"},{"key":"5701_CR36","unstructured":"Chen J, Lu Y, Yu Q, Luo X, Adeli E, Wang Y, Lu L, Yuille AL, Zhou Y (2021) Transunet: transformers make strong encoders for medical image segmentation. arXiv preprint arXiv:2102.04306"},{"key":"5701_CR37","unstructured":"Wang J, Yu X, Gao Y (2021) Feature fusion vision transformer for fine-grained visual categorization. arXiv preprint arXiv:2107.02341"},{"key":"5701_CR38","doi-asserted-by":"crossref","unstructured":"Hu Y, Jin X, Zhang Y, Hong H, Zhang J, He Y, Xue H (2021) Rams-trans: recurrent attention multi-scale transformer for fine-grained image recognition. In: Proceedings of the 29th ACM International Conference on Multimedia, pp 4239\u20134248","DOI":"10.1145\/3474085.3475561"},{"key":"5701_CR39","unstructured":"Zhang Z-C, Chen Z-D, Wang Y, Luo X, Xu X-S (2022) Vit-fod: a vision transformer based fine-grained object discriminator. arXiv preprint arXiv:2203.12816"},{"key":"5701_CR40","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Doll\u00e1r P, Girshick R, He K, Hariharan B, Belongie S (2017) Feature pyramid networks for object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 2117\u20132125","DOI":"10.1109\/CVPR.2017.106"},{"key":"5701_CR41","doi-asserted-by":"crossref","unstructured":"Korsch D, Bodesheim P, Denzler J (2021) End-to-end learning of fisher vector encodings for part features in fine-grained recognition. In: DAGM German Conference on Pattern Recognition, pp 142\u2013158. Springer","DOI":"10.1007\/978-3-030-92659-5_9"},{"key":"5701_CR42","unstructured":"Touvron H, Cord M, Douze M, Massa F, Sablayrolles A, J\u00e9gou H (2021) Training data-efficient image transformers & distillation through attention. In: International Conference on Machine Learning, pp 10347\u201310357. PMLR"},{"key":"5701_CR43","doi-asserted-by":"crossref","unstructured":"Abnar S, Zuidema W (2020) Quantifying attention flow in transformers. arXiv preprint arXiv:2005.00928","DOI":"10.18653\/v1\/2020.acl-main.385"},{"key":"5701_CR44","doi-asserted-by":"crossref","unstructured":"Serrano S, Smith NA (2019) Is attention interpretable? arXiv preprint arXiv:1906.03731","DOI":"10.18653\/v1\/P19-1282"},{"key":"5701_CR45","doi-asserted-by":"crossref","unstructured":"Liu Q, Kusner M, Blunsom P (2021) Counterfactual data augmentation for neural machine translation. In: Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pp 187\u2013197","DOI":"10.18653\/v1\/2021.naacl-main.18"},{"key":"5701_CR46","doi-asserted-by":"crossref","unstructured":"Luo W, Yang X, Mo X, Lu Y, Davis LS, Li J, Yang J, Lim S-N (2019) Cross-x learning for fine-grained visual categorization. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 8242\u20138251","DOI":"10.1109\/ICCV.2019.00833"},{"key":"5701_CR47","unstructured":"Touvron H, Vedaldi A, Douze M, J\u00e9gou H (2019) Fixing the train-test resolution discrepancy, vol 32"},{"key":"5701_CR48","doi-asserted-by":"crossref","unstructured":"Cui Y, Song Y, Sun C, Howard A, Belongie S (2018) Large scale fine-grained categorization and domain-specific transfer learning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 4109\u20134118","DOI":"10.1109\/CVPR.2018.00432"},{"key":"5701_CR49","doi-asserted-by":"crossref","unstructured":"Zhuang P, Wang Y, Qiao Y (2020) Learning attentive pairwise interaction for fine-grained classification. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol 34, pp 13130\u201313137","DOI":"10.1609\/aaai.v34i07.7016"},{"key":"5701_CR50","doi-asserted-by":"crossref","unstructured":"Behera A, Wharton Z, Hewage PR, Bera A (2021) Context-aware attentional pooling (cap) for fine-grained visual classification. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol 35, pp 929\u2013937","DOI":"10.1609\/aaai.v35i2.16176"},{"key":"5701_CR51","doi-asserted-by":"crossref","unstructured":"Zhang L, Huang S, Liu W, Tao D (2019) Learning a mixture of granularity-specific experts for fine-grained categorization. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 8331\u20138340","DOI":"10.1109\/ICCV.2019.00842"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-023-05701-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-023-05701-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-023-05701-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,12]],"date-time":"2024-03-12T20:12:25Z","timestamp":1710274345000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-023-05701-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,18]]},"references-count":51,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2024,3]]}},"alternative-id":["5701"],"URL":"https:\/\/doi.org\/10.1007\/s11227-023-05701-6","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"value":"0920-8542","type":"print"},{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,10,18]]},"assertion":[{"value":"30 September 2023","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 October 2023","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}}]}}