{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T02:19:39Z","timestamp":1773973179741,"version":"3.50.1"},"reference-count":51,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100005230","name":"Natural Science Foundation of Chongqing Municipality","doi-asserted-by":"publisher","award":["CSTB2024NSCQ-KJFZZDX0036"],"award-info":[{"award-number":["CSTB2024NSCQ-KJFZZDX0036"]}],"id":[{"id":"10.13039\/501100005230","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62376042"],"award-info":[{"award-number":["62376042"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Pattern Recognition"],"published-print":{"date-parts":[[2026,7]]},"DOI":"10.1016\/j.patcog.2026.113050","type":"journal-article","created":{"date-parts":[[2026,1,5]],"date-time":"2026-01-05T07:30:12Z","timestamp":1767598212000},"page":"113050","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["FG-MoE: Heterogeneous mixture of experts model for fine-grained visual classification"],"prefix":"10.1016","volume":"175","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-5735-4510","authenticated-orcid":false,"given":"Songming","family":"Yang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6225-1241","authenticated-orcid":false,"given":"Jing","family":"Wen","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1955-6626","authenticated-orcid":false,"given":"Bin","family":"Fang","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.patcog.2026.113050_bib0001","series-title":"Technical Report","article-title":"The Caltech-UCSD Birds-200-2011 Dataset","author":"Wah","year":"2011"},{"issue":"12","key":"10.1016\/j.patcog.2026.113050_bib0002","doi-asserted-by":"crossref","first-page":"8924","DOI":"10.1109\/TPAMI.2021.3126648","article-title":"Fine-grained image analysis with deep learning: a survey","volume":"44","author":"Wei","year":"2022","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"3","key":"10.1016\/j.patcog.2026.113050_bib0003","doi-asserted-by":"crossref","first-page":"431","DOI":"10.1007\/s11633-022-1404-6","article-title":"Parsing objects at a finer granularity: a survey","volume":"21","author":"Zhao","year":"2024","journal-title":"Mach. Intell. Res."},{"key":"10.1016\/j.patcog.2026.113050_bib0004","series-title":"International Conference on Learning Representations","article-title":"Outrageously large neural networks: the sparsely-gated mixture-of-experts layer","author":"Shazeer","year":"2017"},{"key":"10.1016\/j.patcog.2026.113050_bib0005","first-page":"8583","article-title":"Scaling vision with sparse mixture of experts","volume":"34","author":"Riquelme","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2026.113050_bib0006","doi-asserted-by":"crossref","first-page":"748","DOI":"10.1109\/TIP.2021.3135477","article-title":"Cross-part learning for fine-grained image classification","volume":"31","author":"Liu","year":"2022","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.patcog.2026.113050_bib0007","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"1449","article-title":"CNN models for fine-grained visual recognition","author":"Lin","year":"2015"},{"key":"10.1016\/j.patcog.2026.113050_bib0008","series-title":"International Conference on Learning Representations","article-title":"An image is worth 16\u202f\u00d7\u202f16 words: transformers for image recognition at scale","author":"Dosovitskiy","year":"2021"},{"issue":"1","key":"10.1016\/j.patcog.2026.113050_bib0009","first-page":"852","article-title":"TransFG: a transformer architecture for fine-grained recognition","volume":"36","author":"He","year":"2022","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"10.1016\/j.patcog.2026.113050_bib0010","series-title":"Proceedings of the 29th ACM International Conference on Multimedia","first-page":"4239","article-title":"RAMS-trans: recurrent attention multi-scale transformer for fine-grained image recognition","author":"Hu","year":"2021"},{"key":"10.1016\/j.patcog.2026.113050_bib0011","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"10012","article-title":"Swin transformer: hierarchical vision transformer using shifted windows","author":"Liu","year":"2021"},{"key":"10.1016\/j.patcog.2026.113050_bib0012","doi-asserted-by":"crossref","first-page":"4529","DOI":"10.1109\/TIP.2024.3441813","article-title":"Multi-granularity part sampling attention for fine-grained visual classification","volume":"33","author":"Wang","year":"2024","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.patcog.2026.113050_bib0013","doi-asserted-by":"crossref","first-page":"2954","DOI":"10.1109\/TIP.2025.3567834","article-title":"Cross-level multi-instance distillation for self-supervised fine-grained visual categorization","volume":"34","author":"Bi","year":"2025","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.patcog.2026.113050_bib0014","doi-asserted-by":"crossref","first-page":"394","DOI":"10.1109\/TIP.2024.3523802","article-title":"Universal fine-grained visual categorization by concept guided learning","volume":"34","author":"Bi","year":"2025","journal-title":"IEEE Trans. Image Process."},{"issue":"1","key":"10.1016\/j.patcog.2026.113050_bib0015","doi-asserted-by":"crossref","first-page":"79","DOI":"10.1162\/neco.1991.3.1.79","article-title":"Adaptive mixtures of local experts","volume":"3","author":"Jacobs","year":"1991","journal-title":"Neural Comput."},{"key":"10.1016\/j.patcog.2026.113050_bib0016","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"8331","article-title":"Learning a mixture of granularity-specific experts for fine-grained categorization","author":"Zhang","year":"2019"},{"key":"10.1016\/j.patcog.2026.113050_bib0017","doi-asserted-by":"crossref","first-page":"4409","DOI":"10.1109\/TMM.2021.3117064","article-title":"Enhancing mixture-of-experts by leveraging attention for fine-grained recognition","volume":"24","author":"Zhang","year":"2022","journal-title":"IEEE Trans. Multimedia"},{"key":"10.1016\/j.patcog.2026.113050_bib0018","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"11976","article-title":"A ConvNet for the 2020s","author":"Liu","year":"2022"},{"key":"10.1016\/j.patcog.2026.113050_bib0019","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"770","article-title":"Deep residual learning for image recognition","author":"He","year":"2016"},{"key":"10.1016\/j.patcog.2026.113050_bib0020","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-030-01391-2","article-title":"CBAM: convolutional block attention module, computer vision - ECCV 2018","author":"Woo","year":"2018"},{"issue":"3","key":"10.1016\/j.patcog.2026.113050_bib0021","doi-asserted-by":"crossref","first-page":"415","DOI":"10.1016\/j.neuron.2012.01.010","article-title":"How does the brain solve visual object recognition?","volume":"73","author":"Dicarlo","year":"2012","journal-title":"Neuron"},{"key":"10.1016\/j.patcog.2026.113050_bib0022","series-title":"Proc. Adv. Neural Inf. Process. Syst.","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017"},{"key":"10.1016\/j.patcog.2026.113050_bib0023","series-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"7132","article-title":"Squeeze-and-excitation networks","author":"Hu","year":"2018"},{"issue":"26","key":"10.1016\/j.patcog.2026.113050_bib0024","first-page":"429","article-title":"Theory of communication. part 1: the analysis of information","volume":"93","author":"Gabor","year":"1946","journal-title":"J. Inst. Electr. Eng. Part III Radio Commun. Eng."},{"key":"10.1016\/j.patcog.2026.113050_bib0025","series-title":"CVPR 2011 Workshop on Fine-Grained Visual Categorization","article-title":"Stanford dogs dataset","author":"Khosla","year":"2011"},{"key":"10.1016\/j.patcog.2026.113050_bib0026","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"595","article-title":"Building a bird recognition app and large scale dataset with citizen scientists: the fine print in fine-grained dataset collection","author":"Horn","year":"2015"},{"key":"10.1016\/j.patcog.2026.113050_bib0027","doi-asserted-by":"crossref","first-page":"1983","DOI":"10.1109\/LSP.2021.3114622","article-title":"Complemental attention multi-feature fusion network for fine-grained classification","volume":"28","author":"Miao","year":"2021","journal-title":"IEEE Signal Process. Lett."},{"key":"10.1016\/j.patcog.2026.113050_bib0028","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2025.111632","article-title":"Multi-granularity interaction and feature recombination network for fine-grained visual classification","volume":"166","author":"Ke","year":"2025","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113050_bib0029","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV)","first-page":"8242","article-title":"Cross-X learning for fine-grained visual categorization","author":"Luo","year":"2019"},{"key":"10.1016\/j.patcog.2026.113050_bib0030","first-page":"11555","article-title":"Filtration and distillation: enhancing region attention for fine-grained visual categorization","volume":"34","author":"Liu","year":"2020","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"10.1016\/j.patcog.2026.113050_bib0031","first-page":"13130","article-title":"Learning attentive pairwise interaction for fine-grained classification","volume":"34","author":"Zhuang","year":"2020","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"10.1016\/j.patcog.2026.113050_bib0032","series-title":"Proc. Eur. Conf. Comput. Vis","first-page":"153","article-title":"Fine-grained visual classification via progressive multgranularity training of jigsaw patches","author":"Du","year":"2020"},{"key":"10.1016\/j.patcog.2026.113050_bib0033","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"15079","article-title":"Graph-based high-order relation discovery for fine-grained recognition","author":"Zhao","year":"2021"},{"key":"10.1016\/j.patcog.2026.113050_bib0034","series-title":"Fine-Grained Visual Categorization: A Spatial-Frequency Feature Fusion Perspective","first-page":"2798","volume":"33","author":"Wang","year":"2022"},{"issue":"12","key":"10.1016\/j.patcog.2026.113050_bib0035","doi-asserted-by":"crossref","first-page":"9521","DOI":"10.1109\/TPAMI.2021.3126668","article-title":"Progressive learning of category-consistent multi-granularity features for fine-grained visual classification","volume":"44","author":"Du","year":"2022","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.patcog.2026.113050_bib0036","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2023.109305","article-title":"Granularity-aware distillation and structure modeling region proposal network for fine-grained image classification","volume":"137","author":"Ke","year":"2023","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113050_bib0037","series-title":"IEEE\/CVF International Conference on Computer Vision","first-page":"1621","article-title":"Learning gabor texture features for fine-grained recognition","author":"Zhu","year":"2023"},{"key":"10.1016\/j.patcog.2026.113050_bib0038","series-title":"International Conference on Machine Learning","article-title":"ViT-NeT: interpretable vision transformers with neural tree decoder","author":"Kim","year":"2022"},{"key":"10.1016\/j.patcog.2026.113050_bib0039","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"4682","article-title":"Dual cross-attention learning for fine-grained visual categorization and object re-identification","author":"Zhu","year":"2022"},{"key":"10.1016\/j.patcog.2026.113050_bib0040","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2022.109131","article-title":"Mix-Vit: mixing attentive vision transformer for ultra-fine-grained visual categorization","volume":"135","author":"Yu","year":"2023","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113050_bib0041","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2023.109547","article-title":"AA-Trans: core attention aggregating transformer with information entropy selector for fine-Grained visual classification","volume":"140","author":"Wang","year":"2023","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113050_bib0042","doi-asserted-by":"crossref","first-page":"9015","DOI":"10.1109\/TMM.2023.3244340","article-title":"Fine-grained visual classification via internal ensemble learning transformer","volume":"25","author":"Xu","year":"2023","journal-title":"IEEE Trans. Multimedia"},{"key":"10.1016\/j.patcog.2026.113050_bib0043","series-title":"AAAI Conference on Artificial Intelligence","first-page":"2570","article-title":"Delving into multimodal prompting for fine-grained visual classification","author":"Jiang","year":"2024"},{"key":"10.1016\/j.patcog.2026.113050_bib0044","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2025.111618","article-title":"FN-NET: adaptive data augmentation network for fine-grained visual categorization","volume":"165","author":"Ye","year":"2025","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113050_bib0045","doi-asserted-by":"crossref","first-page":"1677","DOI":"10.1109\/TMM.2023.3238548","article-title":"TransIFC: invariant cues-aware feature concentration learning for efficient fine-grained bird image classification","volume":"27","author":"Liu","year":"2025","journal-title":"IEEE Trans. Multimedia"},{"key":"10.1016\/j.patcog.2026.113050_bib0046","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","article-title":"Large scale fine-grained categorization and domain-specific transfer learning","author":"Cui","year":"2018"},{"key":"10.1016\/j.patcog.2026.113050_bib0047","series-title":"ACM International Conference on Multimedia","first-page":"5853","article-title":"SIM-trans: structure information modeling transformer for fine-grained visual categorization, proceedings of the 30th","author":"Sun","year":"2022"},{"key":"10.1016\/j.patcog.2026.113050_bib0048","unstructured":"P.-Y. Chou, C.-H. Lin, W.-C. Kao, A novel plug-in module for fine-grained visual classification, 2022. arXiv preprint, arXiv:2202.03822."},{"key":"10.1016\/j.patcog.2026.113050_bib0049","series-title":"ICASSP 2022 IEEE International Conference on Acoustics, Speech and Signal Processing","first-page":"3234","article-title":"A free lunch from ViT: adaptive attention multi-scale fusion transformer for fine-grained visual recognition","author":"Zhang","year":"2022"},{"key":"10.1016\/j.patcog.2026.113050_bib0050","doi-asserted-by":"crossref","first-page":"5993","DOI":"10.1109\/TCSVT.2025.3535818","article-title":"An attention-locating algorithm for eliminating background effects in fine-grained visual classification","volume":"35","author":"Huang","year":"2025","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.patcog.2026.113050_bib0051","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.111224","article-title":"LDH-ViT: fine-grained visual classification through local concealment and feature selection","volume":"161","author":"Shi","year":"2025","journal-title":"Pattern Recognit."}],"container-title":["Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326000130?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326000130?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T01:38:14Z","timestamp":1773970694000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0031320326000130"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,7]]},"references-count":51,"alternative-id":["S0031320326000130"],"URL":"https:\/\/doi.org\/10.1016\/j.patcog.2026.113050","relation":{},"ISSN":["0031-3203"],"issn-type":[{"value":"0031-3203","type":"print"}],"subject":[],"published":{"date-parts":[[2026,7]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"FG-MoE: Heterogeneous mixture of experts model for fine-grained visual classification","name":"articletitle","label":"Article Title"},{"value":"Pattern Recognition","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.patcog.2026.113050","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"113050"}}