{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T20:43:28Z","timestamp":1761597808478,"version":"3.37.3"},"reference-count":98,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2017,3,30]],"date-time":"2017-03-30T00:00:00Z","timestamp":1490832000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Basic Research Program of China (973 Program)","doi-asserted-by":"crossref","award":["2012CB316302"],"award-info":[{"award-number":["2012CB316302"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Strategic Priority Research Program of the CAS","award":["XDA06040102"],"award-info":[{"award-number":["XDA06040102"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China (CN)","doi-asserted-by":"publisher","award":["61403380"],"award-info":[{"award-number":["61403380"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2017,9]]},"DOI":"10.1007\/s11263-017-1007-9","type":"journal-article","created":{"date-parts":[[2017,3,30]],"date-time":"2017-03-30T22:32:29Z","timestamp":1490913149000},"page":"145-168","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":24,"title":["SDE: A Novel Selective, Discriminative and Equalizing Feature Representation for Visual Recognition"],"prefix":"10.1007","volume":"124","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5995-1391","authenticated-orcid":false,"given":"Guo-Sen","family":"Xie","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xu-Yao","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shuicheng","family":"Yan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Cheng-Lin","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,3,30]]},"reference":[{"key":"1007_CR1","unstructured":"Berg, T., & Belhumeur, P. N. (2013). Poof: Part-based one-vs.-one features for fine-grained categorization, face verification, and attribute estimation. In 2013 IEEE conference on computer vision and pattern recognition (CVPR) (pp. 955\u2013962)."},{"key":"1007_CR2","doi-asserted-by":"crossref","unstructured":"Bo, L., Ren, X., & Fox, D. (2013). Multipath sparse coding using hierarchical matching pursuit. In CVPR.","DOI":"10.1109\/CVPR.2013.91"},{"key":"1007_CR3","doi-asserted-by":"crossref","unstructured":"Boureau, Y. L., Bach, F., LeCun, Y., & Ponce, J. (2010). Learning mid-level features for recognition. In 2010 IEEE conference on computer vision and pattern recognition (CVPR) (pp. 2559\u20132566).","DOI":"10.1109\/CVPR.2010.5539963"},{"key":"1007_CR4","unstructured":"Bradley, D. M., & Bagnell, J. A. (2008). Differential sparse coding. In Neural information processing systems."},{"key":"1007_CR5","doi-asserted-by":"crossref","unstructured":"Chai, Y., Lempitsky, V., & Zisserman, A. (2013). Symbiotic segmentation and part localization for fine-grained categorization. In 2013 IEEE international conference on computer vision (ICCV) (pp. 321\u2013328).","DOI":"10.1109\/ICCV.2013.47"},{"key":"1007_CR6","unstructured":"Chatfield, K., Simonyan, K., Vedaldi, A., & Zisserman, A. (2014). Return of the devil in the details: Delving deep into convolutional nets. ArXiv preprint arXiv:1405.3531 ."},{"issue":"1","key":"1007_CR7","doi-asserted-by":"crossref","first-page":"13","DOI":"10.1109\/TPAMI.2014.2343217","volume":"37","author":"Q Chen","year":"2015","unstructured":"Chen, Q., Song, Z., Dong, J., Huang, Z., Hua, Y., & Yan, S. (2015). Contextualizing object detection and classification. IEEE Transactions on Pattern Analysis and Machine Intelligence, 37(1), 13\u201327.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1007_CR8","doi-asserted-by":"crossref","unstructured":"Chen, Q., Song, Z., Hua, Y., Huang, Z., & Yan, S. (2012). Hierarchical matching with side information for image classification. In 2012 IEEE conference on computer vision and pattern recognition (CVPR) (pp. 3426\u20133433).","DOI":"10.1109\/CVPR.2012.6248083"},{"issue":"1","key":"1007_CR9","doi-asserted-by":"crossref","first-page":"65","DOI":"10.1007\/s11263-015-0872-3","volume":"118","author":"M Cimpoi","year":"2016","unstructured":"Cimpoi, M., Maji, S., Kokkinos, I., & Vedaldi, A. (2016). Deep filter banks for texture recognition, description, and segmentation. International Journal of Computer Vision, 118(1), 65\u201394.","journal-title":"International Journal of Computer Vision"},{"key":"1007_CR10","doi-asserted-by":"crossref","unstructured":"Cimpoi, M., Maji, S., & Vedaldi, A. (2015) Deep filter banks for texture recognition and segmentation. In CVPR.","DOI":"10.1109\/CVPR.2015.7299007"},{"issue":"1","key":"1007_CR11","doi-asserted-by":"crossref","first-page":"235","DOI":"10.1007\/s10479-007-0176-2","volume":"153","author":"B Colson","year":"2007","unstructured":"Colson, B., Marcotte, P., & Savard, G. (2007). An overview of bilevel optimization. Annals of Operations Research, 153(1), 235\u2013256.","journal-title":"Annals of Operations Research"},{"key":"1007_CR12","unstructured":"Csurka, G., Dance, C., Fan, L., Willamowski, J., & Bray, C. (2004) Visual categorization with bags of keypoints. In ECCV."},{"key":"1007_CR13","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L. J., Li, K., & Fei-Fei, L. (2009). ImageNet: A Large-Scale Hierarchical Image Database. In CVPR09.","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"1007_CR14","unstructured":"Doersch, C., Gupta, A., & Efros, A. A. (2013). Mid-level visual element discovery as discriminative mode seeking. In NIPS."},{"key":"1007_CR15","unstructured":"Donahue, J., Jia, Y., Vinyals, O., Hoffman, J., Zhang, N., Tzeng, E., & Darrell, T. (2013). Decaf: A deep convolutional activation feature for generic visual recognition. arXiv:1310.1531 ."},{"key":"1007_CR16","doi-asserted-by":"crossref","unstructured":"Dong, J., Xia, W., Chen, Q., Feng, J., Huang, Z., & Yan, S. (2013). Subcategory-aware object classification. In 2013 IEEE conference on computer vision and pattern recognition (CVPR) (pp. 827\u2013834).","DOI":"10.1109\/CVPR.2013.112"},{"issue":"2","key":"1007_CR17","doi-asserted-by":"crossref","first-page":"407","DOI":"10.1214\/009053604000000067","volume":"32","author":"B Efron","year":"2004","unstructured":"Efron, B., Hastie, T., Johnstone, I., Tibshirani, R., et al. (2004). Least angle regression. The Annals of Statistics, 32(2), 407\u2013499.","journal-title":"The Annals of Statistics"},{"issue":"1","key":"1007_CR18","doi-asserted-by":"crossref","first-page":"98","DOI":"10.1007\/s11263-014-0733-5","volume":"111","author":"M Everingham","year":"2014","unstructured":"Everingham, M., Eslami, S. A., Van Gool, L., Williams, C. K., Winn, J., & Zisserman, A. (2014). The pascal visual object classes challenge: A retrospective. International Journal of Computer Vision, 111(1), 98\u2013136.","journal-title":"International Journal of Computer Vision"},{"issue":"2","key":"1007_CR19","doi-asserted-by":"crossref","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Van Gool, L., Williams, C. K., Winn, J., & Zisserman, A. (2010). The pascal visual object classes (voc) challenge. International Journal of Computer Vision, 88(2), 303\u2013338.","journal-title":"International Journal of Computer Vision"},{"key":"1007_CR20","first-page":"1871","volume":"9","author":"RE Fan","year":"2008","unstructured":"Fan, R. E., Chang, K. W., Hsieh, C. J., Wang, X. R., & Lin, C. J. (2008). Liblinear: A library for large linear classification. JMLR, 9, 1871\u20131874.","journal-title":"JMLR"},{"key":"1007_CR21","doi-asserted-by":"crossref","unstructured":"Fanello, S. R., Noceti, N., Ciliberto, C., Metta, G., & Odone, F. (2014). Ask the image: Supervised pooling to preserve feature locality. In CVPR.","DOI":"10.1109\/CVPR.2014.114"},{"issue":"1","key":"1007_CR22","first-page":"59","volume":"106","author":"L Fei-Fei","year":"2007","unstructured":"Fei-Fei, L., Fergus, R., & Perona, P. (2007). Learning generative visual models from few training examples: An incremental bayesian approach tested on 101 object categories. CVIU, 106(1), 59\u201370.","journal-title":"CVIU"},{"key":"1007_CR23","unstructured":"Feng, J., Ni, B., Tian, Q., & Yan, S. (2011). Geometric $$\\ell $$ \u2113 p-norm feature pooling for image classification. In CVPR."},{"key":"1007_CR24","doi-asserted-by":"crossref","unstructured":"Fernando, B., Fromont, E., & Tuytelaars, T. (2012). Effective use of frequent itemset mining for image classification. In Computer vision\u2014ECCV 2012 (pp. 214\u2013227).","DOI":"10.1007\/978-3-642-33718-5_16"},{"issue":"1","key":"1007_CR25","doi-asserted-by":"crossref","first-page":"92","DOI":"10.1109\/TPAMI.2012.63","volume":"35","author":"S Gao","year":"2013","unstructured":"Gao, S., Tsang, I. W. H., & Chia, L. T. (2013). Laplacian sparse coding, hypergraph laplacian sparse coding, and applications. IEEE Transactions on Pattern Analysis and Machine Intelligence, 35(1), 92\u2013104.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1007_CR26","doi-asserted-by":"crossref","unstructured":"Gavves, E., Fernando, B., Snoek, C. G., Smeulders, A. W., & Tuytelaars, T. (2013). Fine-grained categorization by alignments. In 2013 IEEE international conference on computer vision (ICCV) (pp. 1713\u20131720).","DOI":"10.1109\/ICCV.2013.215"},{"key":"1007_CR27","doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrell, T., & Malik, J. (2014). Rich feature hierarchies for accurate object detection and semantic segmentation. In CVPR.","DOI":"10.1109\/CVPR.2014.81"},{"key":"1007_CR28","doi-asserted-by":"crossref","unstructured":"Gong, Y., Wang, L., Guo, R., & Lazebnik, S. (2014). Multi-scale orderless pooling of deep convolutional activation features. In ECCV.","DOI":"10.1007\/978-3-319-10584-0_26"},{"key":"1007_CR29","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2014). Spatial pyramid pooling in deep convolutional networks for visual recognition. arXiv:1406.4729 ."},{"key":"1007_CR30","unstructured":"Jaderberg, M., Simonyan, K., & Zisserman, A., et\u00a0al. (2015). Spatial transformer networks. In Advances in neural information processing systems (pp. 2017\u20132025)."},{"key":"1007_CR31","doi-asserted-by":"crossref","unstructured":"J\u00e9gou, H., Douze, M., Schmid, C., & P\u00e9rez, P. (2010). Aggregating local descriptors into a compact image representation. In 2010 IEEE conference on computer vision and pattern recognition (CVPR) (pp. 3304\u20133311).","DOI":"10.1109\/CVPR.2010.5540039"},{"key":"1007_CR32","doi-asserted-by":"crossref","unstructured":"J\u00e9gou, H., & Zisserman, A. (2014). Triangulation embedding and democratic aggregation for image search. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 3310\u20133317).","DOI":"10.1109\/CVPR.2014.417"},{"key":"1007_CR33","unstructured":"Jia, Y., Shelhamer, E., Donahue, J., Karayev, S., Long, J., Girshick, R., Guadarrama, S., & Darrell, T. (2014). Caffe: Convolutional architecture for fast feature embedding. arXiv:1408.5093 ."},{"key":"1007_CR34","doi-asserted-by":"crossref","unstructured":"Jiang, Y., Yuan, J., & Yu, G. (2012). Randomized spatial partition for scene recognition. In Computer vision\u2014ECCV 2012 (pp. 730\u2013743).","DOI":"10.1007\/978-3-642-33709-3_52"},{"issue":"11","key":"1007_CR35","doi-asserted-by":"crossref","first-page":"2651","DOI":"10.1109\/TPAMI.2013.88","volume":"35","author":"Z Jiang","year":"2013","unstructured":"Jiang, Z., Lin, Z., & Davis, L. S. (2013). Label consistent k-svd: Learning a discriminative dictionary for recognition. IEEE Transactions on Pattern Analysis and Machine Intelligence, 35(11), 2651\u20132664.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1007_CR36","doi-asserted-by":"crossref","unstructured":"Juneja, M., Vedaldi, A., Jawahar, C., & Zisserman, A. (2013). Blocks that shout: Distinctive parts for scene classification. In CVPR (pp. 923\u2013930).","DOI":"10.1109\/CVPR.2013.124"},{"key":"1007_CR37","unstructured":"Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). Imagenet classification with deep convolutional neural networks. In NIPS."},{"key":"1007_CR38","doi-asserted-by":"crossref","unstructured":"Kwitt, R., Vasconcelos, N., & Rasiwasia, N. (2012). Scene recognition on the semantic manifold. In Computer vision\u2014ECCV 2012 (pp. 359\u2013372).","DOI":"10.1007\/978-3-642-33765-9_26"},{"key":"1007_CR39","doi-asserted-by":"crossref","unstructured":"Lazebnik, S., Schmid, C., & Ponce, J. (2006). Beyond bags of features: Spatial pyramid matching for recognizing natural scene categories. In CVPR.","DOI":"10.1109\/CVPR.2006.68"},{"issue":"11","key":"1007_CR40","doi-asserted-by":"crossref","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y LeCun","year":"1998","unstructured":"LeCun, Y., Bottou, L., Bengio, Y., & Haffner, P. (1998). Gradient-based learning applied to document recognition. Proceedings of the IEEE, 86(11), 2278\u20132324.","journal-title":"Proceedings of the IEEE"},{"key":"1007_CR41","unstructured":"Li, L. J., Su, H., Fei-Fei, L., & Xing, E. P. (2010). Object bank: A high-level image representation for scene classification & semantic feature sparsification. In Advances in neural information processing systems (pp. 1378\u20131386)."},{"key":"1007_CR42","doi-asserted-by":"crossref","unstructured":"Li, Q., Wu, J., & Tu, Z. (2013). Harvesting mid-level visual concepts from large-scale internet images. In 2013 IEEE conference on computer vision and pattern recognition (CVPR) (pp. 851\u2013858).","DOI":"10.1109\/CVPR.2013.115"},{"key":"1007_CR43","doi-asserted-by":"crossref","unstructured":"Lin, D., Lu, C., Liao, R., & Jia, J. (2014). Learning important spatial pooling regions for scene classification. In CVPR.","DOI":"10.1109\/CVPR.2014.476"},{"key":"1007_CR44","doi-asserted-by":"crossref","unstructured":"Lin, T. Y., RoyChowdhury, A., & Maji, S. (2015). Bilinear CNN models for fine-grained visual recognition. In Proceedings of the IEEE international conference on computer vision (pp. 1449\u20131457).","DOI":"10.1109\/ICCV.2015.170"},{"key":"1007_CR45","unstructured":"Liu, L., Wang, L., & Liu, X. (2011). In defense of soft-assignment coding. In ICCV."},{"key":"1007_CR46","unstructured":"Long, J., Shelhamer, E., & Darrell, T. (2014). Fully convolutional networks for semantic segmentation. ArXiv preprint arXiv:1411.4038 ."},{"issue":"2","key":"1007_CR47","doi-asserted-by":"crossref","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"DG Lowe","year":"2004","unstructured":"Lowe, D. G. (2004). Distinctive image features from scale-invariant keypoints. IJCV, 60(2), 91\u2013110.","journal-title":"IJCV"},{"issue":"4","key":"1007_CR48","doi-asserted-by":"crossref","first-page":"791","DOI":"10.1109\/TPAMI.2011.156","volume":"34","author":"J Mairal","year":"2012","unstructured":"Mairal, J., Bach, F., & Ponce, J. (2012). Task-driven dictionary learning. IEEE Transactions on Pattern Analysis and Machine Intelligence, 34(4), 791\u2013804.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1007_CR49","first-page":"19","volume":"11","author":"J Mairal","year":"2010","unstructured":"Mairal, J., Bach, F., Ponce, J., & Sapiro, G. (2010). Online learning for matrix factorization and sparse coding. The Journal of Machine Learning Research, 11, 19\u201360.","journal-title":"The Journal of Machine Learning Research"},{"key":"1007_CR50","doi-asserted-by":"crossref","unstructured":"Murray, N., & Perronnin, F. (2014). Generalized max pooling. In CVPR.","DOI":"10.1109\/CVPR.2014.317"},{"key":"1007_CR51","unstructured":"Nie, F., Huang, H., Cai, X., & Ding, C. H. (2010). Efficient and robust feature selection via joint $$\\ell $$ \u2113 2, 1-norms minimization. In Advances in neural information processing systems (pp. 1813\u20131821)."},{"key":"1007_CR52","doi-asserted-by":"crossref","unstructured":"Nilsback, M. E., & Zisserman, A. (2006). A visual vocabulary for flower classification. In CVPR.","DOI":"10.1109\/CVPR.2006.42"},{"key":"1007_CR53","doi-asserted-by":"crossref","unstructured":"Oquab, M., Bottou, L., Laptev, I., & Sivic, J. (2014). Learning and transferring mid-level image representations using convolutional neural networks. In 2014 IEEE conference on computer vision and pattern recognition (CVPR) (pp. 1717\u20131724).","DOI":"10.1109\/CVPR.2014.222"},{"key":"1007_CR54","doi-asserted-by":"crossref","unstructured":"Pandey, M., & Lazebnik, S. (2011). Scene recognition and weakly supervised object localization with deformable part-based models. In ICCV (pp. 1307\u20131314).","DOI":"10.1109\/ICCV.2011.6126383"},{"key":"1007_CR55","doi-asserted-by":"crossref","unstructured":"Parizi, S. N., Oberlin, J. G., & Felzenszwalb, P. F. (2012). Reconfigurable models for scene recognition. In 2012 IEEE conference on computer vision and pattern recognition (CVPR) (pp. 2775\u20132782).","DOI":"10.1109\/CVPR.2012.6248001"},{"key":"1007_CR56","doi-asserted-by":"crossref","unstructured":"Perronnin, F., & Dance, C. (2007). Fisher kernels on visual vocabularies for image categorization. In CVPR.","DOI":"10.1109\/CVPR.2007.383266"},{"key":"1007_CR57","doi-asserted-by":"crossref","unstructured":"Perronnin, F., S\u00e1nchez, J., & Mensink, T. (2010). Improving the fisher kernel for large-scale image classification. In Computer vision\u2014ECCV 2010 (pp. 143\u2013156).","DOI":"10.1007\/978-3-642-15561-1_11"},{"key":"1007_CR58","doi-asserted-by":"crossref","unstructured":"Quattoni, A., & Torralba, A. (2009). Recognizing indoor scenes. In CVPR.","DOI":"10.1109\/CVPR.2009.5206537"},{"key":"1007_CR59","doi-asserted-by":"crossref","unstructured":"Razavian, A. S., Azizpour, H., Sullivan, J., & Carlsson, S. (2014). CNN features off-the-shelf: An astounding baseline for recognition. In CVPR.","DOI":"10.1109\/CVPRW.2014.131"},{"key":"1007_CR60","doi-asserted-by":"crossref","unstructured":"Sadeghi, F., & Tappen, M. F. (2012). Latent pyramidal regions for recognizing scenes. In Computer vision\u2014ECCV 2012 (pp. 228\u2013241).","DOI":"10.1007\/978-3-642-33715-4_17"},{"key":"1007_CR61","doi-asserted-by":"crossref","unstructured":"Shabou, A., & LeBorgne, H. (2012). Locality-constrained and spatially regularized coding for scene categorization. In 2012 IEEE conference on computer vision and pattern recognition (CVPR) (pp. 3618\u20133625).","DOI":"10.1109\/CVPR.2012.6248107"},{"key":"1007_CR62","doi-asserted-by":"crossref","unstructured":"Shao, M., Li, S., Liu, T., Tao, D., Huang, T. S., & Fu, Y. (2014). Learning relative features through adaptive pooling for image classification. In 2014 IEEE international conference on multimedia and expo (ICME) (pp. 1\u20136).","DOI":"10.1109\/ICME.2014.6890269"},{"key":"1007_CR63","unstructured":"Sharma, G., Jurie, F., & Schmid, C. (2012). Discriminative Spatial Saliency for Image Classification. In CVPR 2012\u2014Conference on computer vision and pattern recognition (pp. 3506\u20133513). IEEE, Providence, Rhode Island, United States. https:\/\/hal.inria.fr\/hal-00714311 ."},{"key":"1007_CR64","unstructured":"Simonyan, K., & Zisserman, A. (2015). Very deep convolutional networks for large-scale image recognition. In ICLR."},{"key":"1007_CR65","doi-asserted-by":"crossref","unstructured":"Singh, S., Gupta, A., & Efros, A. A. (2012). Unsupervised discovery of mid-level discriminative patches. In ECCV (pp. 73\u201386).","DOI":"10.1007\/978-3-642-33709-3_6"},{"key":"1007_CR66","doi-asserted-by":"crossref","unstructured":"Sun, J., & Ponce, J. (2013). Learning discriminative part detectors for image classification and cosegmentation. In 2013 IEEE international conference on computer vision (ICCV) (pp. 3400\u20133407). IEEE.","DOI":"10.1109\/ICCV.2013.422"},{"key":"1007_CR67","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Liu, W., Jia, Y., Sermanet, P., Reed, S., Anguelov, D., Erhan, D., Vanhoucke, V., & Rabinovich, A. (2015). Going deeper with convolutions. In CVPR.","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"1007_CR68","doi-asserted-by":"crossref","unstructured":"van Gemert, J. C., Geusebroek, J. M., Veenman, C. J., & Smeulders, A. W. (2008). Kernel codebooks for scene categorization. In Computer vision\u2014ECCV 2008 (pp. 696\u2013709).","DOI":"10.1007\/978-3-540-88690-7_52"},{"key":"1007_CR69","unstructured":"Vedaldi, A., & Lenc, K. (2014). Matconvnet-convolutional neural networks for matlab. ArXiv preprint arXiv:1412.4564 ."},{"key":"1007_CR70","unstructured":"Wah, C., Branson, S., Welinder, P., Perona, P., & Belongie, S. (2011). The Caltech-UCSD Birds-200-2011 Dataset. Tech. Rep. CNS-TR-2011-001, California Institute of Technology."},{"key":"1007_CR71","doi-asserted-by":"crossref","unstructured":"Wang, J., Yang, J., Yu, K., Lv, F., Huang, T., & Gong, Y. (2010). Locality-constrained linear coding for image classification. In CVPR.","DOI":"10.1109\/CVPR.2010.5540018"},{"key":"1007_CR72","unstructured":"Wang, X., Wang, B., Bai, X., Liu, W., & Tu, Z. (2013). Max-margin multiple-instance dictionary learning. In ICML (pp. 846\u2013854)."},{"key":"1007_CR73","unstructured":"Wei, Y., Xia, W., Huang, J., Ni, B., Dong, J., Zhao, Y., & Yan, S. (2014). CNN: Single-label to multi-label. ArXiv preprint arXiv:1406.5726 ."},{"issue":"8","key":"1007_CR74","doi-asserted-by":"crossref","first-page":"1489","DOI":"10.1109\/TPAMI.2010.224","volume":"33","author":"J Wu","year":"2011","unstructured":"Wu, J., & Rehg, J. M. (2011). Centrist: A visual descriptor for scene categorization. IEEE Transactions on Pattern Analysis and Machine Intelligence, 33(8), 1489\u20131501.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"11","key":"1007_CR75","first-page":"1738","volume":"23","author":"S Xiang","year":"2012","unstructured":"Xiang, S., Nie, F., Meng, G., Pan, C., & Zhang, C. (2012). Discriminative least squares regression for multiclass classification and feature selection. TNNLS, 23(11), 1738\u20131754.","journal-title":"TNNLS"},{"key":"1007_CR76","unstructured":"Xie, G. S., Zhang, X. Y., & Liu, C. L. (2014). Efficient feature coding based on auto-encoder network for image classificatio. In ACCV."},{"key":"1007_CR77","doi-asserted-by":"crossref","unstructured":"Xie, G. S., Zhang, X. Y., Shu, X., Yan, S., & Liu, C. L. (2015). Task-driven feature pooling for image classification. In 2015 IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2015.140"},{"key":"1007_CR78","doi-asserted-by":"crossref","unstructured":"Xie, N., Ling, H., Hu, W., & Zhang, X. (2010). Use bin-ratio information for category and scene classification. In CVPR.","DOI":"10.1109\/CVPR.2010.5539917"},{"key":"1007_CR79","unstructured":"Xu, Z., Yang, Y., & Hauptmann, A. G. (2014). A discriminative CNN video representation for event detection. ArXiv preprint arXiv:1411.4006 ."},{"issue":"1","key":"1007_CR80","doi-asserted-by":"crossref","first-page":"40","DOI":"10.1109\/TPAMI.2007.250598","volume":"29","author":"S Yan","year":"2007","unstructured":"Yan, S., Xu, D., Zhang, B., Zhang, H. J., Yang, Q., & Lin, S. (2007). Graph embedding and extensions: A general framework for dimensionality reduction. IEEE Transactions on Pattern Analysis and Machine Intelligence, 29(1), 40\u201351.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1007_CR81","unstructured":"Yang, H., Zhou, J. T., Zhang, Y., Gao, B., Wu, J., & Cai, J. (2015). Can partial strong labels boost multi-label object recognition? CoRR arXiv:1504.05843 ."},{"issue":"8","key":"1007_CR82","doi-asserted-by":"crossref","first-page":"3467","DOI":"10.1109\/TIP.2012.2192127","volume":"21","author":"J Yang","year":"2012","unstructured":"Yang, J., Wang, Z., Lin, Z., Cohen, S., & Huang, T. (2012). Coupled dictionary training for image super-resolution. IEEE Transactions on Image Processing, 21(8), 3467\u20133478.","journal-title":"IEEE Transactions on Image Processing"},{"key":"1007_CR83","unstructured":"Yang, J., Yu, K., Gong, Y., & Huang, T. (2009). Linear spatial pyramid matching using sparse coding for image classification. In CVPR."},{"issue":"3","key":"1007_CR84","doi-asserted-by":"crossref","first-page":"209","DOI":"10.1007\/s11263-014-0722-8","volume":"109","author":"M Yang","year":"2014","unstructured":"Yang, M., Zhang, L., Feng, X., & Zhang, D. (2014). Sparse representation based fisher discrimination dictionary learning for image classification. International Journal of Computer Vision, 109(3), 209\u2013232.","journal-title":"International Journal of Computer Vision"},{"key":"1007_CR85","unstructured":"Ye, G., Liu, D., Jhuo, I. H., & Chang, S. F. (2012). Robust late fusion with rank minimization. In CVPR."},{"key":"1007_CR86","unstructured":"Yoo, D., Park, S., Lee, J. Y., & Kweon, I. S. (2015). Fisher kernel for deep neural activations. In CVPRW."},{"key":"1007_CR87","doi-asserted-by":"crossref","unstructured":"Zeiler, M. D., & Fergus, R. (2014). Visualizing and understanding convolutional networks. In ECCV.","DOI":"10.1007\/978-3-319-10590-1_53"},{"key":"1007_CR88","unstructured":"Zhang, L., Yang, M., & Feng, X. (2011). Sparse representation or collaborative representation: Which helps face recognition? In 2011 International conference on computer vision (pp. 471\u2013478). IEEE."},{"key":"1007_CR89","doi-asserted-by":"crossref","unstructured":"Zhang, N., Donahue, J., Girshick, R., & Darrell, T. (2014). Part-based R-CNNs for fine-grained category detection. In Computer vision\u2014ECCV 2014 (pp. 834\u2013849).","DOI":"10.1007\/978-3-319-10590-1_54"},{"key":"1007_CR90","doi-asserted-by":"crossref","unstructured":"Zhang, N., Farrell, R., Iandola, F., & Darrell, T. (2013). Deformable part descriptors for fine-grained recognition and attribute prediction. In 2013 IEEE international conference on computer vision (ICCV) (pp. 729\u2013736).","DOI":"10.1109\/ICCV.2013.96"},{"key":"1007_CR91","doi-asserted-by":"publisher","unstructured":"Zhang, Z., Lai, Z., Xu, Y., Shao, L., j. Wu, & Xie, G. S. (2017). Discriminative elastic-net regularized linear regression. IEEE Transactions on Image Processing. doi: 10.1109\/TIP.2017.2651396 .","DOI":"10.1109\/TIP.2017.2651396"},{"key":"1007_CR92","doi-asserted-by":"crossref","first-page":"490","DOI":"10.1109\/ACCESS.2015.2430359","volume":"3","author":"Z Zhang","year":"2015","unstructured":"Zhang, Z., Xu, Y., Yang, J., Li, X., & Zhang, D. (2015). A survey of sparse representation: Algorithms and applications. IEEE Access, 3, 490\u2013530.","journal-title":"IEEE Access"},{"key":"1007_CR93","doi-asserted-by":"crossref","unstructured":"Zheng, Y., Jiang, Y. G., & Xue, X. (2012). Learning hybrid part filters for scene recognition. In ECCV (pp. 172\u2013185).","DOI":"10.1007\/978-3-642-33715-4_13"},{"key":"1007_CR94","unstructured":"Zhou, B., Lapedriza, A., Xiao, J., Torralba, A., & Oliva, A. (2014). Learning deep features for scene recognition using places database. In NIPS."},{"key":"1007_CR95","doi-asserted-by":"crossref","unstructured":"Zhou, X., Yu, K., Zhang, T., & Huang, T. S. (2010). Image classification using super-vector coding of local image descriptors. In ECCV.","DOI":"10.1007\/978-3-642-15555-0_11"},{"key":"1007_CR96","unstructured":"Zhu, J., Li, L. J., Fei-Fei, L., & Xing, E. P. (2010). Large margin learning of upstream scene understanding models. In NIPS (pp. 2586\u20132594)."},{"issue":"2","key":"1007_CR97","doi-asserted-by":"crossref","first-page":"301","DOI":"10.1111\/j.1467-9868.2005.00503.x","volume":"67","author":"H Zou","year":"2005","unstructured":"Zou, H., & Hastie, T. (2005). Regularization and variable selection via the elastic net. Journal of the Royal Statistical Society: Series B (Statistical Methodology), 67(2), 301\u2013320.","journal-title":"Journal of the Royal Statistical Society: Series B (Statistical Methodology)"},{"key":"1007_CR98","doi-asserted-by":"crossref","unstructured":"Zuo, Z., Wang, G., Shuai, B., Zhao, L., Yang, Q., & Jiang, X. (2014). Learning discriminative and shareable features for scene classification. In Computer vision\u2014ECCV 2014 (pp. 552\u2013568).","DOI":"10.1007\/978-3-319-10590-1_36"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11263-017-1007-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-017-1007-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-017-1007-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,5,17]],"date-time":"2020-05-17T19:15:18Z","timestamp":1589742918000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11263-017-1007-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,3,30]]},"references-count":98,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2017,9]]}},"alternative-id":["1007"],"URL":"https:\/\/doi.org\/10.1007\/s11263-017-1007-9","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"type":"print","value":"0920-5691"},{"type":"electronic","value":"1573-1405"}],"subject":[],"published":{"date-parts":[[2017,3,30]]}}}