{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T16:29:17Z","timestamp":1778171357419,"version":"3.51.4"},"publisher-location":"Cham","reference-count":45,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031200526","type":"print"},{"value":"9783031200533","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-20053-3_4","type":"book-chapter","created":{"date-parts":[[2022,11,5]],"date-time":"2022-11-05T16:21:52Z","timestamp":1667665312000},"page":"57-73","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":20,"title":["Where to\u00a0Focus: Investigating Hierarchical Attention Relationship for\u00a0Fine-Grained Visual Classification"],"prefix":"10.1007","author":[{"given":"Yang","family":"Liu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lei","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Pengcheng","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiao","family":"Bai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lin","family":"Gu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaohan","family":"Yu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jun","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Edwin R.","family":"Hancock","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,11,6]]},"reference":[{"key":"4_CR1","doi-asserted-by":"crossref","unstructured":"Berg, T., Belhumeur, P.N.: POOF: part-based one-vs.-one features for fine-grained categorization, face verification, and attribute estimation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 955\u2013962 (2013)","DOI":"10.1109\/CVPR.2013.128"},{"key":"4_CR2","doi-asserted-by":"crossref","unstructured":"Chai, Y., Lempitsky, V., Zisserman, A.: Symbiotic segmentation and part localization for fine-grained categorization. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 321\u2013328 (2013)","DOI":"10.1109\/ICCV.2013.47"},{"key":"4_CR3","doi-asserted-by":"publisher","first-page":"4683","DOI":"10.1109\/TIP.2020.2973812","volume":"29","author":"D Chang","year":"2020","unstructured":"Chang, D., et al.: The devil is in the channels: mutual-channel loss for fine-grained image classification. IEEE Trans. Image Process. 29, 4683\u20134695 (2020)","journal-title":"IEEE Trans. Image Process."},{"key":"4_CR4","doi-asserted-by":"crossref","unstructured":"Chang, D., Pang, K., Zheng, Y., Ma, Z., Song, Y.Z., Guo, J.: Your \u201cflamingo\u201d is my \u201cbird\u201d: fine-grained, or not. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11476\u201311485 (2021)","DOI":"10.1109\/CVPR46437.2021.01131"},{"key":"4_CR5","doi-asserted-by":"crossref","unstructured":"Chen, T., Wu, W., Gao, Y., Dong, L., Luo, X., Lin, L.: Fine-grained representation learning and recognition by exploiting hierarchical semantic embedding. In: Proceedings of the 26th ACM International Conference on Multimedia, pp. 2023\u20132031 (2018)","DOI":"10.1145\/3240508.3240523"},{"key":"4_CR6","doi-asserted-by":"crossref","unstructured":"Chen, Y., Bai, Y., Zhang, W., Mei, T.: Destruction and construction learning for fine-grained image recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5157\u20135166 (2019)","DOI":"10.1109\/CVPR.2019.00530"},{"key":"4_CR7","doi-asserted-by":"publisher","first-page":"90","DOI":"10.1016\/j.cviu.2017.10.001","volume":"163","author":"A Das","year":"2017","unstructured":"Das, A., Agrawal, H., Zitnick, L., Parikh, D., Batra, D.: Human attention in visual question answering: do humans and deep networks look at the same regions? Comput. Vis. Image Underst. 163, 90\u2013100 (2017)","journal-title":"Comput. Vis. Image Underst."},{"key":"4_CR8","doi-asserted-by":"publisher","first-page":"520","DOI":"10.1016\/j.neucom.2019.04.095","volume":"398","author":"S Ding","year":"2020","unstructured":"Ding, S., Qu, S., Xi, Y., Wan, S.: Stimulus-driven and concept-driven analysis for image caption generation. Neurocomputing 398, 520\u2013530 (2020)","journal-title":"Neurocomputing"},{"key":"4_CR9","doi-asserted-by":"crossref","unstructured":"Ding, Y., Zhou, Y., Zhu, Y., Ye, Q., Jiao, J.: Selective sparse sampling for fine-grained image recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6599\u20136608 (2019)","DOI":"10.1109\/ICCV.2019.00670"},{"key":"4_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1007\/978-3-030-58565-5_10","volume-title":"Computer Vision \u2013 ECCV 2020","author":"R Du","year":"2020","unstructured":"Du, R., et al.: Fine-grained visual classification via progressive multi-granularity training of jigsaw patches. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12365, pp. 153\u2013168. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58565-5_10"},{"key":"4_CR11","doi-asserted-by":"crossref","unstructured":"Dubey, A., Gupta, O., Guo, P., Raskar, R., Farrell, R., Naik, N.: Pairwise confusion for fine-grained visual classification. In: Proceedings of the European Conference on Computer Vision, pp. 70\u201386 (2018)","DOI":"10.1007\/978-3-030-01258-8_5"},{"key":"4_CR12","doi-asserted-by":"crossref","unstructured":"Fan, D.P., Wang, W., Cheng, M.M., Shen, J.: Shifting more attention to video salient object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8554\u20138564 (2019)","DOI":"10.1109\/CVPR.2019.00875"},{"key":"4_CR13","doi-asserted-by":"crossref","unstructured":"Gao, Y., Beijbom, O., Zhang, N., Darrell, T.: Compact bilinear pooling. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 317\u2013326 (2016)","DOI":"10.1109\/CVPR.2016.41"},{"key":"4_CR14","doi-asserted-by":"crossref","unstructured":"Hou, S., Feng, Y., Wang, Z.: VegFru: a domain-specific dataset for fine-grained visual categorization. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 541\u2013549 (2017)","DOI":"10.1109\/ICCV.2017.66"},{"key":"4_CR15","unstructured":"Hu, T., Qi, H., Huang, Q., Lu, Y.: See better before looking closer: weakly supervised data augmentation network for fine-grained visual classification. arXiv preprint arXiv:1901.09891 (2019)"},{"key":"4_CR16","doi-asserted-by":"crossref","unstructured":"Huang, S., Wang, X., Tao, D.: Stochastic partial swap: Enhanced model generalization and interpretability for fine-grained recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 620\u2013629 (2021)","DOI":"10.1109\/ICCV48922.2021.00066"},{"key":"4_CR17","doi-asserted-by":"crossref","unstructured":"Huang, S., Xu, Z., Tao, D., Zhang, Y.: Part-stacked CNN for fine-grained visual categorization. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1173\u20131182 (2016)","DOI":"10.1109\/CVPR.2016.132"},{"key":"4_CR18","doi-asserted-by":"crossref","unstructured":"Huang, Y., Cai, M., Li, Z., Sato, Y.: Predicting gaze in egocentric video by learning task-dependent attention transition. In: Proceedings of the European Conference on Computer Vision, pp. 754\u2013769 (2018)","DOI":"10.1007\/978-3-030-01225-0_46"},{"key":"4_CR19","unstructured":"Huang, Y., et al.: Leveraging human selective attention for medical image analysis with limited training data. In: The British Machine Vision Conference (2021)"},{"key":"4_CR20","doi-asserted-by":"crossref","unstructured":"Huang, Z., Li, Y.: Interpretable and accurate fine-grained recognition via region grouping. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8662\u20138672 (2020)","DOI":"10.1109\/CVPR42600.2020.00869"},{"key":"4_CR21","doi-asserted-by":"crossref","unstructured":"Ji, R., et al.: Attention convolutional binary neural tree for fine-grained visual categorization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10468\u201310477 (2020)","DOI":"10.1109\/CVPR42600.2020.01048"},{"key":"4_CR22","doi-asserted-by":"crossref","unstructured":"Karessli, N., Akata, Z., Schiele, B., Bulling, A.: Gaze embeddings for zero-shot image classification. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4525\u20134534 (2017)","DOI":"10.1109\/CVPR.2017.679"},{"key":"4_CR23","doi-asserted-by":"crossref","unstructured":"Kong, S., Fowlkes, C.: Low-rank bilinear pooling for fine-grained classification. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 365\u2013374 (2017)","DOI":"10.1109\/CVPR.2017.743"},{"key":"4_CR24","doi-asserted-by":"crossref","unstructured":"Krause, J., Stark, M., Deng, J., Fei-Fei, L.: 3D object representations for fine-grained categorization. In: Proceedings of the IEEE International Conference on Computer Vision Workshops, pp. 554\u2013561 (2013)","DOI":"10.1109\/ICCVW.2013.77"},{"key":"4_CR25","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., RoyChowdhury, A., Maji, S.: Bilinear CNN models for fine-grained visual recognition. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1449\u20131457 (2015)","DOI":"10.1109\/ICCV.2015.170"},{"key":"4_CR26","doi-asserted-by":"crossref","unstructured":"Liu, C., Mao, J., Sha, F., Yuille, A.: Attention correctness in neural image captioning. In: Proceedings of the AAAI Conference on Artificial Intelligence (2017)","DOI":"10.1609\/aaai.v31i1.11197"},{"key":"4_CR27","doi-asserted-by":"crossref","unstructured":"Liu, Y., et al.: Goal-oriented gaze estimation for zero-shot learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3794\u20133803 (2021)","DOI":"10.1109\/CVPR46437.2021.00379"},{"key":"4_CR28","doi-asserted-by":"crossref","unstructured":"Luo, W., et al.: Cross-X learning for fine-grained visual categorization. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 8242\u20138251 (2019)","DOI":"10.1109\/ICCV.2019.00833"},{"key":"4_CR29","unstructured":"Maji, S., Rahtu, E., Kannala, J., Blaschko, M., Vedaldi, A.: Fine-grained visual classification of aircraft. arXiv preprint arXiv:1306.5151 (2013)"},{"key":"4_CR30","doi-asserted-by":"crossref","unstructured":"Ranasinghe, K., Naseer, M., Hayat, M., Khan, S., Khan, F.S.: Orthogonal projection loss. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 12333\u201312343 (2021)","DOI":"10.1109\/ICCV48922.2021.01211"},{"key":"4_CR31","doi-asserted-by":"crossref","unstructured":"Rao, Y., Chen, G., Lu, J., Zhou, J.: Counterfactual attention learning for fine-grained visual categorization and re-identification. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1025\u20131034 (2021)","DOI":"10.1109\/ICCV48922.2021.00106"},{"key":"4_CR32","unstructured":"Rong, Y., Xu, W., Akata, Z., Kasneci, E.: Human attention in fine-grained classification. In: BMVC 2021 (2021)"},{"key":"4_CR33","unstructured":"Wah, C., Branson, S., Welinder, P., Perona, P., Belongie, S.: The caltech-UCSD birds-200-2011 dataset (2011)"},{"key":"4_CR34","doi-asserted-by":"crossref","unstructured":"Wang, S., Ouyang, X., Liu, T., Wang, Q., Shen, D.: Follow my eye: using gaze to supervise computer-aided diagnosis. IEEE Trans. Med. Imaging (2022)","DOI":"10.1109\/TMI.2022.3146973"},{"key":"4_CR35","doi-asserted-by":"crossref","unstructured":"Wang, W., Shen, J., Guo, F., Cheng, M.M., Borji, A.: Revisiting video saliency: a large-scale benchmark and a new model. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4894\u20134903 (2018)","DOI":"10.1109\/CVPR.2018.00514"},{"key":"4_CR36","doi-asserted-by":"crossref","unstructured":"Wang, Y., Morariu, V.I., Davis, L.S.: Learning a discriminative filter bank within a CNN for fine-grained recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4148\u20134157 (2018)","DOI":"10.1109\/CVPR.2018.00436"},{"key":"4_CR37","doi-asserted-by":"crossref","unstructured":"Wu, A., Liu, R., Han, Y., Zhu, L., Yang, Y.: Vector-decomposed disentanglement for domain-invariant object detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9342\u20139351 (2021)","DOI":"10.1109\/ICCV48922.2021.00921"},{"key":"4_CR38","unstructured":"Xiao, T., Xu, Y., Yang, K., Zhang, J., Peng, Y., Zhang, Z.: The application of two-level attention models in deep convolutional neural network for fine-grained image classification. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 842\u2013850 (2015)"},{"key":"4_CR39","doi-asserted-by":"crossref","unstructured":"Yang, M., et al.: DOLG: single-stage image retrieval with deep orthogonal fusion of local and global features. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 11772\u201311781 (2021)","DOI":"10.1109\/ICCV48922.2021.01156"},{"key":"4_CR40","doi-asserted-by":"crossref","unstructured":"Yang, Z., Luo, T., Wang, D., Hu, Z., Gao, J., Wang, L.: Learning to navigate for fine-grained classification. In: Proceedings of the European Conference on Computer Vision, pp. 420\u2013435 (2018)","DOI":"10.1007\/978-3-030-01264-9_26"},{"key":"4_CR41","doi-asserted-by":"crossref","unstructured":"Yu, A., Grauman, K.: Fine-grained visual comparisons with local learning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 192\u2013199 (2014)","DOI":"10.1109\/CVPR.2014.32"},{"key":"4_CR42","first-page":"12992","volume":"34","author":"Q Yu","year":"2021","unstructured":"Yu, Q., Xia, Y., Bai, Y., Lu, Y., Yuille, A.L., Shen, W.: Glance-and-gaze vision transformer. Adv. Neural Inf. Process. Syst. 34, 12992\u201313003 (2021)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"4_CR43","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1007\/978-3-319-10590-1_54","volume-title":"Computer Vision \u2013 ECCV 2014","author":"N Zhang","year":"2014","unstructured":"Zhang, N., Donahue, J., Girshick, R., Darrell, T.: Part-based R-CNNs for fine-grained category detection. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8689, pp. 834\u2013849. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10590-1_54"},{"key":"4_CR44","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Yan, K., Huang, F., Li, J.: Graph-based high-order relation discovery for fine-grained recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15079\u201315088 (2021)","DOI":"10.1109\/CVPR46437.2021.01483"},{"key":"4_CR45","doi-asserted-by":"crossref","unstructured":"Zheng, H., Fu, J., Zha, Z.J., Luo, J.: Looking for the devil in the details: learning trilinear attention sampling network for fine-grained image recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5012\u20135021 (2019)","DOI":"10.1109\/CVPR.2019.00515"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2022"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-20053-3_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,5]],"date-time":"2022-11-05T16:24:06Z","timestamp":1667665446000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-20053-3_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031200526","9783031200533"],"references-count":45,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-20053-3_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"6 November 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tel Aviv","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Israel","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2022.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5804","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1645","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"28% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.21","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.91","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}