{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T19:46:17Z","timestamp":1775245577615,"version":"3.50.1"},"publisher-location":"Cham","reference-count":43,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030012182","type":"print"},{"value":"9783030012199","type":"electronic"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-030-01219-9_22","type":"book-chapter","created":{"date-parts":[[2018,10,6]],"date-time":"2018-10-06T10:23:51Z","timestamp":1538821431000},"page":"365-380","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":66,"title":["Grassmann Pooling as Compact Homogeneous Bilinear Pooling for Fine-Grained Visual Classification"],"prefix":"10.1007","author":[{"given":"Xing","family":"Wei","sequence":"first","affiliation":[]},{"given":"Yue","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Yihong","family":"Gong","sequence":"additional","affiliation":[]},{"given":"Jiawei","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Nanning","family":"Zheng","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,10,7]]},"reference":[{"issue":"2","key":"22_CR1","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"DG Lowe","year":"2004","unstructured":"Lowe, D.G.: Distinctive image features from scale-invariant keypoints. IJCV 60(2), 91\u2013110 (2004)","journal-title":"IJCV"},{"issue":"1","key":"22_CR2","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1023\/A:1011126920638","volume":"43","author":"T Leung","year":"2001","unstructured":"Leung, T., Malik, J.: Representing and recognizing the visual appearance of materials using three-dimensional textons. IJCV 43(1), 29\u201344 (2001)","journal-title":"IJCV"},{"key":"22_CR3","doi-asserted-by":"crossref","unstructured":"Lazebnik, S., Schmid, C., Ponce, J.: Beyond bags of features: spatial pyramid matching for recognizing natural scene categories. In: CVPR, pp. 2169\u20132178 (2006)","DOI":"10.1109\/CVPR.2006.68"},{"key":"22_CR4","doi-asserted-by":"crossref","unstructured":"Yang, J., Yu, K., Gong, Y., Huang, T.: Linear spatial pyramid matching using sparse coding for image classification. In: CVPR, pp. 1794\u20131801 (2009)","DOI":"10.1109\/CVPR.2009.5206757"},{"key":"22_CR5","doi-asserted-by":"crossref","unstructured":"Wang, J., Yang, J., Yu, K., Lv, F., Huang, T., Gong, Y.: Locality-constrained linear coding for image classification. In: CVPR, pp. 3360\u20133367 (2010)","DOI":"10.1109\/CVPR.2010.5540018"},{"key":"22_CR6","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: ImageNet classification with deep convolutional neural networks. In: NIPS, pp. 1097\u20131105 (2012)"},{"key":"22_CR7","doi-asserted-by":"crossref","unstructured":"Krause, J., Stark, M., Deng, J., Fei-Fei, L.: 3D object representations for fine-grained categorization. In: 4th International IEEE Workshop on 3D Representation and Recognition (3dRR) (2013)","DOI":"10.1109\/ICCVW.2013.77"},{"key":"22_CR8","doi-asserted-by":"crossref","unstructured":"Yang, L., Luo, P., Change Loy, C., Tang, X.: A large-scale car dataset for fine-grained categorization and verification. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7299023"},{"key":"22_CR9","unstructured":"Wah, C., Branson, S., Welinder, P., Perona, P., Belongie, S.: The Caltech-UCSD birds-200-2011 dataset (2011)"},{"key":"22_CR10","unstructured":"Maji, S., Rahtu, E., Kannala, J., Blaschko, M., Vedaldi, A.: Fine-grained visual classification of aircraft. arXiv preprint arXiv:1306.5151 (2013)"},{"key":"22_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"446","DOI":"10.1007\/978-3-319-10599-4_29","volume-title":"Computer Vision \u2013 ECCV 2014","author":"L Bossard","year":"2014","unstructured":"Bossard, L., Guillaumin, M., Van Gool, L.: Food-101 \u2013 mining discriminative components with random forests. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8694, pp. 446\u2013461. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10599-4_29"},{"key":"22_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1007\/978-3-319-10590-1_54","volume-title":"Computer Vision \u2013 ECCV 2014","author":"N Zhang","year":"2014","unstructured":"Zhang, N., Donahue, J., Girshick, R., Darrell, T.: Part-based R-CNNs for fine-grained category detection. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8689, pp. 834\u2013849. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10590-1_54"},{"key":"22_CR13","doi-asserted-by":"crossref","unstructured":"Zhang, X., Zhou, F., Lin, Y., Zhang, S.: Embedding label structures for fine-grained feature representation. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.126"},{"key":"22_CR14","doi-asserted-by":"crossref","unstructured":"Huang, S., Xu, Z., Tao, D., Zhang, Y.: Part-stacked CNN for fine-grained visual categorization. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.132"},{"key":"22_CR15","unstructured":"Jaderberg, M., Simonyan, K., Zisserman, A., et al.: Spatial transformer networks. In: NIPS, pp. 2017\u20132025 (2015)"},{"key":"22_CR16","unstructured":"Xiao, T., Xu, Y., Yang, K., Zhang, J., Peng, Y., Zhang, Z.: The application of two-level attention models in deep convolutional neural network for fine-grained image classification. In: CVPR (2015)"},{"key":"22_CR17","doi-asserted-by":"crossref","unstructured":"Zheng, H., Fu, J., Mei, T., Luo, J.: Learning multi-attention convolutional neural network for fine-grained image recognition. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.557"},{"key":"22_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"301","DOI":"10.1007\/978-3-319-46487-9_19","volume-title":"Computer Vision \u2013 ECCV 2016","author":"J Krause","year":"2016","unstructured":"Krause, J., et al.: The unreasonable effectiveness of noisy data for fine-grained recognition. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9907, pp. 301\u2013320. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46487-9_19"},{"key":"22_CR19","doi-asserted-by":"crossref","unstructured":"Xu, Z., Huang, S., Zhang, Y., Tao, D.: Augmenting strong supervision using web data for fine-grained categorization. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.290"},{"key":"22_CR20","doi-asserted-by":"crossref","unstructured":"Cui, Y., Zhou, F., Lin, Y., Belongie, S.: Fine-grained categorization and dataset bootstrapping using deep metric learning with humans in the loop. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.130"},{"key":"22_CR21","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., RoyChowdhury, A., Maji, S.: Bilinear CNN models for fine-grained visual recognition. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.170"},{"key":"22_CR22","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"589","DOI":"10.1007\/11744047_45","volume-title":"Computer Vision \u2013 ECCV 2006","author":"O Tuzel","year":"2006","unstructured":"Tuzel, O., Porikli, F., Meer, P.: Region covariance: a fast descriptor for detection and classification. In: Leonardis, A., Bischof, H., Pinz, A. (eds.) ECCV 2006. LNCS, vol. 3952, pp. 589\u2013600. Springer, Heidelberg (2006). https:\/\/doi.org\/10.1007\/11744047_45"},{"key":"22_CR23","doi-asserted-by":"crossref","unstructured":"Gao, Y., Beijbom, O., Zhang, N., Darrell, T.: Compact bilinear pooling. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.41"},{"key":"22_CR24","doi-asserted-by":"crossref","unstructured":"Kong, S., Fowlkes, C.: Low-rank bilinear pooling for fine-grained classification. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.743"},{"key":"22_CR25","doi-asserted-by":"crossref","unstructured":"Li, Y., Wang, N., Liu, J., Hou, X.: Factorized bilinear models for image recognition. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.229"},{"key":"22_CR26","doi-asserted-by":"crossref","unstructured":"J\u00e9gou, H., Douze, M., Schmid, C.: On the burstiness of visual elements. In: CVPR, pp. 1169\u20131176 (2009)","DOI":"10.1109\/CVPR.2009.5206609"},{"key":"22_CR27","doi-asserted-by":"crossref","unstructured":"Arandjelovic, R., Zisserman, A.: All about VLAD. In: CVPR (2013)","DOI":"10.1109\/CVPR.2013.207"},{"issue":"9","key":"22_CR28","doi-asserted-by":"publisher","first-page":"1704","DOI":"10.1109\/TPAMI.2011.235","volume":"34","author":"H Jegou","year":"2012","unstructured":"Jegou, H., Perronnin, F., Douze, M., S\u00e1nchez, J., Perez, P., Schmid, C.: Aggregating local image descriptors into compact codes. IEEE TPAMI 34(9), 1704\u20131716 (2012)","journal-title":"IEEE TPAMI"},{"key":"22_CR29","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Maji, S.: Improved bilinear pooling with CNNs. In: BMVC (2017)","DOI":"10.5244\/C.31.117"},{"key":"22_CR30","doi-asserted-by":"crossref","unstructured":"Turaga, P., Veeraraghavan, A., Chellappa, R.: Statistical analysis on Stiefel and Grassmann manifolds with applications in computer vision. In: CVPR, pp. 1\u20138 (2008)","DOI":"10.1109\/CVPR.2008.4587733"},{"key":"22_CR31","doi-asserted-by":"crossref","unstructured":"Hamm, J., Lee, D.D.: Grassmann discriminant analysis: a unifying view on subspace-based learning. In: ICML (2008)","DOI":"10.1145\/1390156.1390204"},{"issue":"12","key":"22_CR32","doi-asserted-by":"publisher","first-page":"2464","DOI":"10.1109\/TPAMI.2015.2414422","volume":"37","author":"S Jayasumana","year":"2015","unstructured":"Jayasumana, S., Hartley, R., Salzmann, M., Li, H., Harandi, M.: Kernel methods on Riemannian manifolds with Gaussian RBF kernels. IEEE TPAMI 37(12), 2464\u20132477 (2015)","journal-title":"IEEE TPAMI"},{"key":"22_CR33","doi-asserted-by":"crossref","unstructured":"Wei, X., Zhang, Y., Gong, Y., Zheng, N.: Kernelized subspace pooling for deep local descriptors. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00200"},{"key":"22_CR34","unstructured":"Tenenbaum, J.B., Freeman, W.T.: Separating style and content. In: NIPS, pp. 662\u2013668 (1997)"},{"key":"22_CR35","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"430","DOI":"10.1007\/978-3-642-33786-4_32","volume-title":"Computer Vision \u2013 ECCV 2012","author":"J Carreira","year":"2012","unstructured":"Carreira, J., Caseiro, R., Batista, J., Sminchisescu, C.: Semantic segmentation with second-order pooling. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012. LNCS, vol. 7578, pp. 430\u2013443. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-33786-4_32"},{"key":"22_CR36","doi-asserted-by":"crossref","unstructured":"Fukui, A., Park, D.H., Yang, D., Rohrbach, A., Darrell, T., Rohrbach, M.: Multimodal compact bilinear pooling for visual question answering and visual grounding. arXiv preprint arXiv:1606.01847 (2016)","DOI":"10.18653\/v1\/D16-1044"},{"key":"22_CR37","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"774","DOI":"10.1007\/978-3-642-33709-3_55","volume-title":"Computer Vision \u2013 ECCV 2012","author":"H J\u00e9gou","year":"2012","unstructured":"J\u00e9gou, H., Chum, O.: Negative evidences and co-occurences in image retrieval: the benefit of PCA and whitening. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012. LNCS, pp. 774\u2013787. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-33709-3_55"},{"key":"22_CR38","unstructured":"Kar, P., Karnick, H.: Random feature maps for dot product kernels. In: Artificial Intelligence and Statistics, pp. 583\u2013591 (2012)"},{"key":"22_CR39","doi-asserted-by":"crossref","unstructured":"Pham, N., Pagh, R.: Fast and scalable polynomial kernels via explicit feature maps. In: ACM SIGKDD, pp. 239\u2013247 (2013)","DOI":"10.1145\/2487575.2487591"},{"key":"22_CR40","doi-asserted-by":"crossref","unstructured":"Arandjelovi\u0107, R., Zisserman, A.: Three things everyone should know to improve object retrieval. In: CVPR, pp. 2911\u20132918 (2012)","DOI":"10.1109\/CVPR.2012.6248018"},{"key":"22_CR41","first-page":"2579","volume":"9","author":"L Maaten","year":"2008","unstructured":"Maaten, L., Hinton, G.: Visualizing data using t-SNE. JMLR 9, 2579\u20132605 (2008)","journal-title":"JMLR"},{"key":"22_CR42","doi-asserted-by":"crossref","unstructured":"Ionescu, C., Vantzos, O., Sminchisescu, C.: Matrix backpropagation for deep networks with structured layers. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.339"},{"key":"22_CR43","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2018"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-01219-9_22","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T18:47:19Z","timestamp":1775242039000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-01219-9_22"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783030012182","9783030012199"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-01219-9_22","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"7 October 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Munich","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2018.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}