{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T17:19:39Z","timestamp":1780507179270,"version":"3.54.1"},"publisher-location":"Cham","reference-count":58,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030012694","type":"print"},{"value":"9783030012700","type":"electronic"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-030-01270-0_49","type":"book-chapter","created":{"date-parts":[[2018,10,5]],"date-time":"2018-10-05T18:07:51Z","timestamp":1538762871000},"page":"834-850","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":304,"title":["Multi-Attention Multi-Class Constraint for Fine-grained Image Recognition"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5948-2708","authenticated-orcid":false,"given":"Ming","family":"Sun","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5251-9942","authenticated-orcid":false,"given":"Yuchen","family":"Yuan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1132-5877","authenticated-orcid":false,"given":"Feng","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1867-5378","authenticated-orcid":false,"given":"Errui","family":"Ding","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2018,10,6]]},"reference":[{"key":"49_CR1","doi-asserted-by":"crossref","unstructured":"Bossard, L., Guillaumin, M., Gool, L.V.: Food-101 - mining discriminative components with random forests. In: ECCV (2014)","DOI":"10.1007\/978-3-319-10599-4_29"},{"key":"49_CR2","doi-asserted-by":"crossref","unstructured":"Branson, S., Van Horn, G., Belongie, S., Perona, P.: Bird species categorization using pose normalized deep convolutional nets. In: BMVC (2014)","DOI":"10.5244\/C.28.87"},{"issue":"1\u20132","key":"49_CR3","first-page":"3","volume":"108","author":"S Branson","year":"2014","unstructured":"Branson, S., Van Horn, G., Wah, C., Perona, P., Belongie, S.: The ignorant led by the blind: a hybrid human-machine vision system for fine-grained categorization. Int. J. Comput. Vis. 108(1\u20132), 3\u201329 (2014)","journal-title":"Int. J. Comput. Vis."},{"key":"49_CR4","doi-asserted-by":"crossref","unstructured":"Bromley, J., Guyon, I., LeCun, Y., S\u00e4ckinger, E., Shah, R.: Signature verification using a \u201cSiamese\" time delay neural network. In: NIPS (1994)","DOI":"10.1142\/9789812797926_0003"},{"key":"49_CR5","unstructured":"Collobert, R., Kavukcuoglu, K., Farabet, C.: Torch7: A matlab-like environment for machine learning. In: BigLearn, NIPS workshop (2011)"},{"key":"49_CR6","doi-asserted-by":"crossref","unstructured":"Cui, Y., Zhou, F., Lin, Y., Belongie, S.: Fine-grained categorization and dataset bootstrapping using deep metric learning with humans in the loop. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1153\u20131162 (2016)","DOI":"10.1109\/CVPR.2016.130"},{"key":"49_CR7","doi-asserted-by":"crossref","unstructured":"Cui, Y., Zhou, F., Wang, J., Liu, X., Lin, Y., Belongie, S.: Kernel pooling for convolutional neural networks. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.325"},{"key":"49_CR8","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: Imagenet: a large-scale hierarchical image database. In: CVPR (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"49_CR9","doi-asserted-by":"crossref","unstructured":"Farrell, R., Oza, O., Zhang, N., Morariu, V.I., Darrell, T., Davis, L.S.: Birdlets: subordinate categorization using volumetric primitives and pose-normalized appearance. In: ICCV (2011)","DOI":"10.1109\/ICCV.2011.6126238"},{"key":"49_CR10","doi-asserted-by":"crossref","unstructured":"Fu, J., Zheng, H., Mei, T.: Look closer to see better: recurrent attention convolutional neural network for fine-grained image recognition. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.476"},{"key":"49_CR11","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.90"},{"issue":"8","key":"49_CR12","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"49_CR13","doi-asserted-by":"crossref","unstructured":"Hu, J., Shen, L., Sun, G.: Squeeze-and-excitation networks. arXiv preprint arXiv:1709.01507 (2017)","DOI":"10.1109\/CVPR.2018.00745"},{"key":"49_CR14","unstructured":"Jaderberg, M., Simonyan, K., Zisserman, A.: Spatial transformer networks. In: NIPS (2015)"},{"key":"49_CR15","doi-asserted-by":"crossref","unstructured":"Jia, Y., et al.: Caffe: convolutional architecture for fast feature embedding. In: ACM MM (2014)","DOI":"10.1145\/2647868.2654889"},{"key":"49_CR16","unstructured":"Khosla, A., Jayadevaprakash, N., Yao, B., Li, F.F.: Novel dataset for fine-grained image categorization: stanford dogs. In: CVPR Workshops on Fine-Grained Visual Categorization (2011)"},{"key":"49_CR17","doi-asserted-by":"crossref","unstructured":"Krause, J., Gebru, T., Deng, J., Li, L.J., Fei-Fei, L.: Learning features and parts for fine-grained recognition. In: ICPR (2014)","DOI":"10.1109\/ICPR.2014.15"},{"key":"49_CR18","doi-asserted-by":"crossref","unstructured":"Krause, J., Jin, H., Yang, J., Fei-Fei, L.: Fine-grained recognition without part annotations. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7299194"},{"key":"49_CR19","doi-asserted-by":"publisher","first-page":"301","DOI":"10.1007\/978-3-319-46487-9_19","volume-title":"Computer Vision \u2013 ECCV 2016","author":"Jonathan Krause","year":"2016","unstructured":"Krause, J., et al.: The unreasonable effectiveness of noisy data for fine-grained recognition. In: ECCV (2016)"},{"key":"49_CR20","doi-asserted-by":"crossref","unstructured":"Krause, J., Stark, M., Deng, J., Fei-Fei, L.: 3D object representations for fine-grained categorization. In: ICCV Workshops on 3D Representation and Recognition (2013)","DOI":"10.1109\/ICCVW.2013.77"},{"key":"49_CR21","doi-asserted-by":"crossref","unstructured":"Li, Z., Yang, Y., Liu, X., Wen, S., Xu, W.: Dynamic computational time for visual attention. arXiv preprint arXiv:1703.10332 (2017)","DOI":"10.1109\/ICCVW.2017.145"},{"key":"49_CR22","doi-asserted-by":"crossref","unstructured":"Lin, D., Shen, X., Lu, C., Jia, J.: Deep LAC: deep localization, alignment and classification for fine-grained recognition. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298775"},{"key":"49_CR23","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., RoyChowdhury, A., Maji, S.: Bilinear CNN models for fine-grained visual recognition. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.170"},{"key":"49_CR24","doi-asserted-by":"crossref","unstructured":"Lin, Y., Morariu, V.I., Hsu, W.H., Davis, L.S.: Jointly optimizing 3D model fitting and fine-grained classification. In: ECCV (2014)","DOI":"10.1007\/978-3-319-10593-2_31"},{"key":"49_CR25","doi-asserted-by":"crossref","unstructured":"Liu, J., Kanazawa, A., Jacobs, D.W., Belhumeur, P.N.: Dog breed classification using part localization. In: ECCV (2012)","DOI":"10.1007\/978-3-642-33718-5_13"},{"key":"49_CR26","unstructured":"Liu, X., Xia, T., Wang, J., Yang, Y., Zhou, F., Lin, Y.: Fully convolutional attention networks for fine-grained recognition. arXiv preprint arXiv:1603.06765 (2017)"},{"key":"49_CR27","unstructured":"Mnih, V., Heess, N., Graves, A., Kavukcuoglu, K.: Recurrent models of visual attention. In: NIPS (2014)"},{"key":"49_CR28","doi-asserted-by":"crossref","unstructured":"Nilsback, M., Zisserman, A.: Automated flower classification over a large number of classes. In: ICVGIP (2008)","DOI":"10.1109\/ICVGIP.2008.47"},{"key":"49_CR29","doi-asserted-by":"crossref","unstructured":"Parkhi, O.M., Vedaldi, A., Jawahar, C., Zisserman, A.: The truth about cats and dogs. In: ICCV (2011)","DOI":"10.1109\/ICCV.2011.6126398"},{"key":"49_CR30","doi-asserted-by":"crossref","unstructured":"Perronnin, F., Larlus, D.: Fisher vectors meet neural networks: a hybrid classification architecture. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298998"},{"key":"49_CR31","doi-asserted-by":"crossref","unstructured":"Rosenfeld, A., Ullman, S.: Visual concept recognition and localization via iterative introspection. In: ACCV (2016)","DOI":"10.1007\/978-3-319-54193-8_17"},{"key":"49_CR32","unstructured":"Salakhutdinov, R., Hinton, G.E.: Learning a nonlinear embedding by preserving class neighbourhood structure. In: AISTATS (2007)"},{"key":"49_CR33","doi-asserted-by":"crossref","unstructured":"Schroff, F., Kalenichenko, D., Philbin, J.: FaceNet: A unified embedding for face recognition and clustering. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298682"},{"key":"49_CR34","doi-asserted-by":"crossref","unstructured":"Simon, M., Gao, Y., Darrell, T., Denzler, J., Rodner, E.: Generalized orderless pooling performs implicit salient matching. arXiv preprint arXiv:1705.00487 (2017)","DOI":"10.1109\/ICCV.2017.531"},{"key":"49_CR35","doi-asserted-by":"crossref","unstructured":"Simon, M., Rodner, E.: Neural activation constellations: Unsupervised part model discovery with convolutional networks. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.136"},{"key":"49_CR36","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)"},{"key":"49_CR37","unstructured":"Sohn, K.: Improved deep metric learning with multi-class n-pair loss objective. In: NIPS (2016)"},{"key":"49_CR38","unstructured":"Van Horn, G., et al.: The iNaturalist challenge 2017 dataset. arXiv preprint arXiv:1707.06642 (2017)"},{"key":"49_CR39","unstructured":"Wah, C., Branson, S., Welinder, P., Perona, P., Belongie, S.: The Caltech-UCSD birds-200-2011 dataset. Technical report CNS-TR-2011-001, California Institute of Technology (2011)"},{"key":"49_CR40","doi-asserted-by":"crossref","unstructured":"Wang, D., Shen, Z., Shao, J., Zhang, W., Xue, X., Zhang, Z.: Multiple granularity descriptors for fine-grained categorization. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.276"},{"key":"49_CR41","doi-asserted-by":"crossref","unstructured":"Wang, F., et al.: Residual attention network for image classification. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.683"},{"key":"49_CR42","doi-asserted-by":"crossref","unstructured":"Wang, J., Zhou, F., Wen, S., Liu, X., Lin, Y.: Deep metric learning with angular loss. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.283"},{"key":"49_CR43","doi-asserted-by":"crossref","unstructured":"Wang, J., et al.: Learning fine-grained image similarity with deep ranking. In: CVPR (2014)","DOI":"10.1109\/CVPR.2014.180"},{"key":"49_CR44","doi-asserted-by":"crossref","unstructured":"Wang, Y., Choi, J., Morariu, V., Davis, L.S.: Mining discriminative triplets of patches for fine-grained classification. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.131"},{"key":"49_CR45","doi-asserted-by":"crossref","unstructured":"Wilber, M., Kwak, I.S., Kriegman, D., Belongie, S.: Learning concept embeddings with combined human-machine expertise. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.118"},{"key":"49_CR46","doi-asserted-by":"crossref","unstructured":"Zeiler, M.D., Fergus, R.: Visualizing and understanding convolutional networks. In: ECCV (2014)","DOI":"10.1007\/978-3-319-10590-1_53"},{"key":"49_CR47","doi-asserted-by":"crossref","unstructured":"Zhang, H., et al.: SPDA-CNN: unifying semantic part detection and abstraction for fine-grained recognition. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.129"},{"key":"49_CR48","doi-asserted-by":"crossref","unstructured":"Zhang, N., Donahue, J., Girshick, R., Darrell, T.: Part-based R-CNNs for fine-grained category detection. In: ECCV (2014)","DOI":"10.1007\/978-3-319-10590-1_54"},{"key":"49_CR49","doi-asserted-by":"crossref","unstructured":"Zhang, N., Paluri, M., Ranzato, M., Darrell, T., Bourdev, L.: Panda: Pose aligned networks for deep attribute modeling. In: CVPR (2014)","DOI":"10.1109\/CVPR.2014.212"},{"key":"49_CR50","doi-asserted-by":"crossref","unstructured":"Zhang, X., Zhou, F., Lin, Y., Zhang, S.: Embedding label structures for fine-grained feature representation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1114\u20131123 (2016)","DOI":"10.1109\/CVPR.2016.126"},{"key":"49_CR51","doi-asserted-by":"crossref","unstructured":"Zhang, X., Xiong, H., Zhou, W., Lin, W., Tian, Q.: Picking deep filter responses for fine-grained image recognition. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.128"},{"issue":"6","key":"49_CR52","doi-asserted-by":"publisher","first-page":"1245","DOI":"10.1109\/TMM.2017.2648498","volume":"19","author":"B Zhao","year":"2017","unstructured":"Zhao, B., Wu, X., Feng, J., Peng, Q., Yan, S.: Diversified visual attention networks for fine-grained object classification. IEEE Trans. Multimed. 19(6), 1245\u20131256 (2017)","journal-title":"IEEE Trans. Multimed."},{"key":"49_CR53","doi-asserted-by":"crossref","unstructured":"Zheng, H., Fu, J., Mei, T., Luo, J.: Learning multi-attention convolutional neural network for fine-grained image recognition. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.557"},{"key":"49_CR54","unstructured":"Zhou, B., Khosla, A., Lapedriza, \u00c0., Oliva, A., Torralba, A.: Object detectors emerge in deep scene CNNs. In: ICLR (2014)"},{"key":"49_CR55","doi-asserted-by":"crossref","unstructured":"Zhou, B., Khosla, A., Lapedriza, A., Oliva, A., Torralba, A.: Learning deep features for discriminative localization. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.319"},{"key":"49_CR56","unstructured":"Zhou, B., Lapedriza, A., Xiao, J., Torralba, A., Oliva, A.: Learning deep features for scene recognition using places database. In: NIPS (2014)"},{"key":"49_CR57","doi-asserted-by":"crossref","unstructured":"Zhou, F., Lin, Y.: Fine-grained image classification by exploring bipartite-graph labels. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.127"},{"key":"49_CR58","doi-asserted-by":"crossref","unstructured":"Zhu, Y., Zhou, Y., Ye, Q., Qiu, Q., Jiao, J.: Soft proposal networks for weakly supervised object localization. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.204"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2018"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-01270-0_49","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T18:39:33Z","timestamp":1775241573000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-01270-0_49"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783030012694","9783030012700"],"references-count":58,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-01270-0_49","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"6 October 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Munich","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2018.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}