{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T19:42:32Z","timestamp":1776886952870,"version":"3.51.2"},"reference-count":47,"publisher":"Tsinghua University Press","issue":"4","license":[{"start":{"date-parts":[[2020,12,1]],"date-time":"2020-12-01T00:00:00Z","timestamp":1606780800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"},{"start":{"date-parts":[[2020,10,1]],"date-time":"2020-10-01T00:00:00Z","timestamp":1601510400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Comp. Visual. Med."],"published-print":{"date-parts":[[2020,12]]},"DOI":"10.1007\/s41095-020-0184-6","type":"journal-article","created":{"date-parts":[[2020,10,2]],"date-time":"2020-10-02T03:02:58Z","timestamp":1601607778000},"page":"477-487","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":34,"title":["A new dataset of dog breed images and a benchmark for finegrained classification"],"prefix":"10.26599","volume":"6","author":[{"given":"Ding-Nan","family":"Zou","sequence":"first","affiliation":[{"name":"Department of Computer Science and Technology, BNRist, Tsinghua University, Beijing 100084, China; NaJiu Company, Hunan 410022, China"}]},{"given":"Song-Hai","family":"Zhang","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, BNRist, Tsinghua University, Beijing 100084, China"}]},{"given":"Tai-Jiang","family":"Mu","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, BNRist, Tsinghua University, Beijing 100084, China"}]},{"given":"Min","family":"Zhang","sequence":"additional","affiliation":[{"name":"Harvard Medical School, Brigham and Women's Hospital, Boston, MA 02115, USA"}]}],"member":"11138","reference":[{"key":"184_CR1","doi-asserted-by":"crossref","unstructured":"Cai, S.; Zuo, W.; Zhang, L. Higher-order integration of hierarchical convolutional activations for fine-grained visual categorization. In: Proceedings of the IEEE International Conference on Computer Vision, 511\u2013520, 2017.","DOI":"10.1109\/ICCV.2017.63"},{"key":"184_CR2","doi-asserted-by":"crossref","unstructured":"Cui, Y.; Song, Y.; Sun, C.; Howard, A.; Belongie, S. J. Large scale fine-grained categorization and domain-specific transfer learning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 4109\u20134118, 2018.","DOI":"10.1109\/CVPR.2018.00432"},{"key":"184_CR3","doi-asserted-by":"crossref","unstructured":"Wang, Y.; Morariu, V. I.; Davis, L. S. Learning a discriminative filter bank within a CNN for fine-grained recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 4148\u20134157, 2018.","DOI":"10.1109\/CVPR.2018.00436"},{"key":"184_CR4","doi-asserted-by":"publisher","first-page":"438","DOI":"10.1007\/978-3-030-01264-9_26","volume-title":"Computer Vision \u2014 ECCV 2018. Lecture Notes in Computer Science Vol. 11218","author":"Z Yang","year":"2018","unstructured":"Yang, Z.; Luo, T. G.; Wang, D.; Hu, Z. Q.; Gao, J.; Wang, L. W. Learning to navigate for fine-grained classification. In: Computer Vision \u2014 ECCV 2018. Lecture Notes in Computer Science Vol. 11218. Ferrari, V.; Hebert, M.; Sminchisescu, C.; Weiss, Y. Eds. Springer Cham, 438\u2013454, 2018."},{"key":"184_CR5","unstructured":"Khosla, A.; Jayadevaprakash, N.; Yao, B.; Li, F.-F. Novel dataset for fine-grained image categorization. In: Proceedings of the 1st Workshop on Fine-Grained Visual Categorization, IEEE Conference on Computer Vision and Pattern Recognition, 2011."},{"key":"184_CR6","unstructured":"Krizhevsky, A.; Sutskever, I.; Hinton, G. E. ImageNet classification with deep convolutional neural networks. In: Proceedings of the 25th International Conference on Neural Information Processing Systems, Vol. 1, 1097\u20131105, 2012."},{"issue":"1","key":"184_CR7","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1007\/s41095-016-0073-1","volume":"3","author":"L Chen","year":"2017","unstructured":"Chen, L.; Yang, M. Semi-supervised dictionary learning with label propagation for image classification. Computational Visual Media Vol. 3, No. 1, 83\u201394, 2017.","journal-title":"Computational Visual Media"},{"issue":"3","key":"184_CR8","doi-asserted-by":"publisher","first-page":"245","DOI":"10.1007\/s41095-018-0119-7","volume":"4","author":"K X Chen","year":"2018","unstructured":"Chen, K. X.; Wu, X. J. Component SPD matrices: A low-dimensional discriminative data descriptor for image set classification. Computational Visual Media Vol. 4, No. 3, 245\u2013252, 2018.","journal-title":"Computational Visual Media"},{"issue":"4","key":"184_CR9","doi-asserted-by":"publisher","first-page":"379","DOI":"10.1007\/s41095-017-0094-4","volume":"3","author":"J Y Ren","year":"2017","unstructured":"Ren, J. Y.; Wu, X. J. Vectorial approximations of infinite-dimensional covariance descriptors for image classification. Computational Visual Media Vol. 3, No. 4, 379\u2013385, 2017.","journal-title":"Computational Visual Media"},{"key":"184_CR10","unstructured":"Wah, C.; Branson, S.; Welinder, P.; Perona, P.; Belongie, S. The Caltech-UCSD Birds-200\u20132011 Dataset. Computation & Neural Systems Technical Report, CNS-TR-2011-001. California Institute of Technology, 2011."},{"key":"184_CR11","doi-asserted-by":"crossref","unstructured":"Liu, J.; Kanazawa, A.; Jacobs, D.; Belhumeur, P. Dog breed classification using part localization. In: Proceedings of the 12th European Conference on Computer Vision, Vol. Part I, 172\u2013185, 2012.","DOI":"10.1007\/978-3-642-33718-5_13"},{"key":"184_CR12","doi-asserted-by":"crossref","unstructured":"Berg, T.; Belhumeur, P. N. POOF: Part-based one-vs.-one features for fine-grained categorization, face verification, and attribute estimation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 955\u2013962, 2013.","DOI":"10.1109\/CVPR.2013.128"},{"key":"184_CR13","doi-asserted-by":"crossref","unstructured":"Branson, S.; Horn, G. V.; Belongie, S.; Perona, P. Bird species categorization using pose normalized deep convolutional nets. arXiv preprint arXiv:1406.2952, 2014.","DOI":"10.5244\/C.28.87"},{"key":"184_CR14","first-page":"834","volume-title":"Computer Vision-ECCV 2014. Lecture Notes in Computer Science Vol. 8689","author":"N Zhang","year":"2014","unstructured":"Zhang, N.; Donahue, J.; Girshick, R.; Darrell, T. Part-based R-CNNs for fine-grained category detection. In: Computer Vision-ECCV 2014. Lecture Notes in Computer Science Vol. 8689. Fleet, D.; Pajdla, T.; Schiele, B.; Tuytelaars, T. Eds. Springer Cham, 834\u2013849, 2014."},{"key":"184_CR15","doi-asserted-by":"crossref","unstructured":"Lin, D.; Shen, X.; Lu, C.; Jia, J. Deep LAC: Deep localization, alignment and classification for fine-grained recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 1666\u20131674, 2015.","DOI":"10.1109\/CVPR.2015.7298775"},{"key":"184_CR16","doi-asserted-by":"crossref","unstructured":"Lam, M.; Mahasseni, B.; Todorovic, S. Fine-grained recognition as HSnet search for informative image parts. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 6497\u20136506, 2017.","DOI":"10.1109\/CVPR.2017.688"},{"key":"184_CR17","doi-asserted-by":"crossref","unstructured":"Chen, Y.; Bai, Y.; Zhang, W.; Mei, T. Destruction and construction learning for finegrained image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 5157\u20135166, 2019.","DOI":"10.1109\/CVPR.2019.00530"},{"key":"184_CR18","doi-asserted-by":"crossref","unstructured":"Ge, W. F.; Lin, X. R.; Yu, Y. Z. Weakly supervised complementary parts models for fine-grained image classification from the bottom up. arXiv preprint arXiv:1903.02827, 2019.","DOI":"10.1109\/CVPR.2019.00315"},{"key":"184_CR19","doi-asserted-by":"crossref","unstructured":"Du, R. Y.; Chang, D. L.; Bhunia, A. K.; Xie, J. Y.; Ma, Z. Y.; Song, Y. Z.; Guo, J. Fine-grained visual classification via progressive multi-granularity training of jigsaw patches. arXiv preprint arXiv:2003.03836, 2020.","DOI":"10.1007\/978-3-030-58565-5_10"},{"key":"184_CR20","doi-asserted-by":"crossref","unstructured":"Zheng, H.; Fu, J.; Mei, T.; Luo, J. Learning multi-attention convolutional neural network for fine-grained image recognition. In: Proceedings of the IEEE International Conference on Computer Vision, 5219\u20135227, 2017.","DOI":"10.1109\/ICCV.2017.557"},{"key":"184_CR21","doi-asserted-by":"crossref","unstructured":"Fu, J.; Zheng, H.; Mei, T. Look closer to see better: Recurrent attention convolutional neural network for fine-grained image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 4476\u20134484, 2017.","DOI":"10.1109\/CVPR.2017.476"},{"key":"184_CR22","doi-asserted-by":"crossref","unstructured":"Zheng, H.; Fu, J.; Zha, Z.; Luo, J.; Looking for the devil in the details: Learning trilinear attention sampling network for fine-grained image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 5012\u20135021, 2019.","DOI":"10.1109\/CVPR.2019.00515"},{"key":"184_CR23","unstructured":"Zhang, F.; Li, M.; Zhai, G.; Liu, Y. Three-branch and multi-scale learning for fine-grained image recognition (TBMSL-Net). arXiv preprint arXiv:2003.09150, 2020."},{"key":"184_CR24","doi-asserted-by":"crossref","unstructured":"Sun, G. L.; Cholakkal, H.; Khan, S.; Khan, F. S.; Shao, L. Fine-grained recognition: Accounting for subtle differences between similar classes. arXiv preprint arXiv:1912.06842, 2019.","DOI":"10.1609\/aaai.v34i07.6882"},{"key":"184_CR25","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y.; RoyChowdhury, A.; Maji, S. Bilinear CNN models for fine-grained visual recognition. In: Proceedings of the IEEE international conference on computer vision, 1449\u20131457, 2015.","DOI":"10.1109\/ICCV.2015.170"},{"key":"184_CR26","doi-asserted-by":"crossref","unstructured":"Gao, Y.; Beijbom, O.; Zhang, N.; Darrell, T. Compact bilinear pooling. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 317\u2013326, 2016.","DOI":"10.1109\/CVPR.2016.41"},{"key":"184_CR27","doi-asserted-by":"publisher","first-page":"595","DOI":"10.1007\/978-3-030-01270-0_35","volume-title":"Computer Vision-ECCV 2018. Lecture Notes in Computer Science Vol. 11220","author":"C Yu","year":"2018","unstructured":"Yu, C.; Zhao, X.; Zheng, Q.; Zhang, P.; You, X. Hierarchical bilinear pooling for fine-grained visual recognition. In: Computer Vision-ECCV 2018. Lecture Notes in Computer Science Vol. 11220. Ferrari, V.; Hebert, M.; Sminchisescu, C.; Weiss, Y. Eds. Springer Cham, 595\u2013610, 2018."},{"key":"184_CR28","doi-asserted-by":"crossref","unstructured":"Wang, Y.; Choi, J.; Morariu, V. I.; Davis, L. S. Mining discriminative triplets of patches for fine-grained classification. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 1163\u20131172, 2016.","DOI":"10.1109\/CVPR.2016.131"},{"key":"184_CR29","doi-asserted-by":"crossref","unstructured":"Zhang, X.; Zhou, F.; Lin, Y.; Zhang, S. Embedding label structures for finegrained feature representation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 1114\u20131123, 2016.","DOI":"10.1109\/CVPR.2016.126"},{"key":"184_CR30","unstructured":"Dubey, A.; Gupta, O.; Raskar, R.; Naik, N. Maximum-entropy fine grained classification. arXiv preprint arXiv:1809.05934, 2018."},{"key":"184_CR31","unstructured":"Qian, Q.; Jin, R.; Zhu, S.; Lin, Y. Fine-grained visual categorization via multi-stage metric learning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 3716\u20133724, 2015."},{"key":"184_CR32","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1007\/978-3-030-01270-0_49","volume-title":"Computer Vision-ECCV 2018. Lecture Notes in Computer Science Vol. 11220","author":"M Sun","year":"2018","unstructured":"Sun, M.; Yuan, Y.; Zhou, F.; Ding, E. Multi-attention multi-class constraint for fine-grained image recognition. In: Computer Vision-ECCV 2018. Lecture Notes in Computer Science Vol. 11220. Ferrari, V.; Hebert, M.; Sminchisescu, C.; Weiss, Y. Eds. Springer Cham, 834\u2013850, 2018."},{"key":"184_CR33","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1007\/978-3-030-01258-8_5","volume-title":"Computer Vision-ECCV 2018. Lecture Notes in Computer Science Vol. 11216","author":"A Dubey","year":"2018","unstructured":"Dubey, A.; Gupta, O.; Guo, P.; Raskar, R.; Farrell, R.; Naik, N. Pairwise confusion for fine-grained visual classification. In: Computer Vision-ECCV 2018. Lecture Notes in Computer Science Vol. 11216. Ferrari, V.; Hebert, M.; Sminchisescu, C.; Weiss, Y. Eds. Springer Cham, 71\u201388, 2018."},{"key":"184_CR34","doi-asserted-by":"crossref","unstructured":"Zhuang, P.; Wang, Y.; Qiao, Y. Learning attentive pairwise interaction for fine-grained classification. arXiv preprint arXiv:2002.10191, 2020.","DOI":"10.1609\/aaai.v34i07.7016"},{"key":"184_CR35","doi-asserted-by":"crossref","unstructured":"Xu, Z.; Huang, S.; Zhang, Y.; Tao, D. Augmenting strong supervision using web data for finegrained categorization. In: Proceedings of the IEEE International Conference on Computer Vision, 2524\u20132532, 2015.","DOI":"10.1109\/ICCV.2015.290"},{"key":"184_CR36","unstructured":"Niu, L.; Veeraraghavan, A.; Sabharwal, A. Finegrained classification using heterogeneous web data and auxiliary categories. arXiv preprint arXiv:1811.07567, 2018."},{"key":"184_CR37","doi-asserted-by":"crossref","unstructured":"Torralba, A.; Efros, A. A. Unbiased look at dataset bias. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 1521\u20131528, 2011.","DOI":"10.1109\/CVPR.2011.5995347"},{"key":"184_CR38","unstructured":"Hu, T.; Qi, H. G.; Huang, Q. M.; Lu, Y. See better before looking closer: Weakly supervised data augmentation network for fine-grained visual classification. arXiv preprint arXiv:1901.09891, 2019."},{"key":"184_CR39","doi-asserted-by":"crossref","unstructured":"Krause, J.; Stark, M.; Deng, J.; L. Fei-Fei. 3D object representations for fine-grained categorization. In: Proceedings of the IEEE International Conference on Computer Vision Workshops, 554\u2013561, 2013.","DOI":"10.1109\/ICCVW.2013.77"},{"key":"184_CR40","unstructured":"Maji, S.; Rahtu, E.; Kannala, J.; Blaschko, M.; Vedaldi, A. Fine-grained visual classification of aircraft. arXiv preprint arXiv:1306.5151, 2013."},{"key":"184_CR41","doi-asserted-by":"crossref","unstructured":"Nilsback, M.; Zisserman, A. Automated flower classification over a large number of classes. In: Proceedings of the 6th Indian Conference on Computer Vision, Graphics & Image Processing, 722\u2013729, 2008.","DOI":"10.1109\/ICVGIP.2008.47"},{"key":"184_CR42","doi-asserted-by":"crossref","unstructured":"Deng, J.; Dong, W.; Socher, R.; Li, L.; Li, K.; Fei-Fei, L. ImageNet: A large-scale hierarchical image database. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 248\u2013255, 2009.","DOI":"10.1109\/CVPR.2009.5206848"},{"issue":"2","key":"184_CR43","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M.; van Gool, L.; Williams, C. K. I.; Winn, J.; Zisserman, A. The pascal visual object classes (VOC) challenge. International Journal of Computer-Vision Vol. 88, No. 2, 303\u2013338, 2010.","journal-title":"International Journal of Computer-Vision"},{"key":"184_CR44","doi-asserted-by":"crossref","unstructured":"Lin, T.; Maire, M.; Belongie, S.; Bourdev, L.; Girshick, R.; Hays, J.; Perona, P.; Ramanan, D.; Zitnick, C. L.; Doll\u00e1r, P. Microsoft COCO: Common objects in context. arXiv preprint arXiv:1405.0312, 2014.","DOI":"10.1007\/978-3-319-10602-1_48"},{"issue":"4","key":"184_CR45","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang, Z.; Bovik, A. C.; Sheikh, H. R.; Simoncelli, E. P. Image quality assessment: From error visibility to structural similarity. IEEE Transactions on Image Processing Vol. 13, No. 4, 600\u2013612, 2004.","journal-title":"IEEE Transactions on Image Processing"},{"issue":"1\u20133","key":"184_CR46","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1007\/s11263-007-0090-8","volume":"77","author":"B C Russell","year":"2008","unstructured":"Russell, B. C.; Torralba, A.; Murphy, K. P.; Freeman, W. T. LabelMe: A database and web-based tool for image annotation. International Journal of Computer-Vision Vol. 77, Nos. 1\u20133, 157\u2013173, 2008.","journal-title":"International Journal of Computer-Vision"},{"key":"184_CR47","doi-asserted-by":"crossref","unstructured":"Huang, G.; Liu, Z.; van der Maaten, L.; Weinberger, K. Q. Densely connected convolutional networks. arXiv preprint arXiv:1608.06993, 2016.","DOI":"10.1109\/CVPR.2017.243"}],"container-title":["Computational Visual Media"],"original-title":[],"link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s41095-020-0184-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s41095-020-0184-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s41095-020-0184-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10750449\/10897327\/10897337.pdf?arnumber=10897337","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,5]],"date-time":"2025-11-05T18:38:15Z","timestamp":1762367895000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10897337\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,12]]},"references-count":47,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1007\/s41095-020-0184-6","relation":{},"ISSN":["2096-0662","2096-0433"],"issn-type":[{"value":"2096-0662","type":"electronic"},{"value":"2096-0433","type":"print"}],"subject":[],"published":{"date-parts":[[2020,12]]},"assertion":[{"value":"18 May 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 June 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 October 2020","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}