{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,26]],"date-time":"2025-11-26T16:25:06Z","timestamp":1764174306826},"reference-count":100,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2016,8,29]],"date-time":"2016-08-29T00:00:00Z","timestamp":1472428800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2017,2]]},"DOI":"10.1007\/s11263-016-0945-y","type":"journal-article","created":{"date-parts":[[2016,8,29]],"date-time":"2016-08-29T14:33:08Z","timestamp":1472481188000},"page":"344-364","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":38,"title":["Mining Mid-level Visual Patterns with Deep CNN Activations"],"prefix":"10.1007","volume":"121","author":[{"given":"Yao","family":"Li","sequence":"first","affiliation":[]},{"given":"Lingqiao","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Chunhua","family":"Shen","sequence":"additional","affiliation":[]},{"given":"Anton van den","family":"Hengel","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,8,29]]},"reference":[{"issue":"1","key":"945_CR1","doi-asserted-by":"crossref","first-page":"15","DOI":"10.1007\/s11263-007-0072-x","volume":"78","author":"A Agarwal","year":"2008","unstructured":"Agarwal, A., & Triggs, B. (2008). Multilevel image coding with hyperfeatures. International Journal of Computer Vision, 78(1), 15\u201327.","journal-title":"International Journal of Computer Vision"},{"key":"945_CR2","doi-asserted-by":"crossref","unstructured":"Agrawal, P., Girshick, R., & Malik, J. (2014). Analyzing the performance of multilayer neural networks for object recognition. In Proceedings European Conference on Computer Vision, (pp. 329\u2013344).","DOI":"10.1007\/978-3-319-10584-0_22"},{"key":"945_CR3","unstructured":"Agrawal, R., & Srikant, R. (1994). Fast algorithms for mining association rules in large databases. In Proceedings International Conference Very Large Databases, (pp. 487\u2013499)."},{"key":"945_CR4","doi-asserted-by":"crossref","unstructured":"Aubry, M., Maturana, D., Efros, A. A., Russell, B. C., Sivic, J. (2014a) Seeing 3d chairs: exemplar part-based 2d-3d alignment using a large dataset of cad models. In Proceedings of IEEE Conference on Computer Vision Pattern Recognition, (pp. 3762\u20133769).","DOI":"10.1109\/CVPR.2014.487"},{"key":"945_CR5","doi-asserted-by":"crossref","unstructured":"Aubry, M., Russell, B. C., & Sivic, J. (2014b). Painting-to-3d model alignment via discriminative visual elements. In Proceedings Annual ACM SIGIR Conference, 33(2), p. 14.","DOI":"10.1145\/2591009"},{"key":"945_CR6","doi-asserted-by":"crossref","unstructured":"Azizpour, H., Razavian, A. S., Sullivan, J., Maki, A., & Carlsson, S. (2016). Factors of transferability for a generic convnet representation. IEEE Transactions Pattern Analysis and Machine Intelligence, 38(9),1790\u20131802.","DOI":"10.1109\/TPAMI.2015.2500224"},{"key":"945_CR7","unstructured":"Bansal, A., Shrivastava, A., Doersch, C., & Gupta, A. (2015). Mid-level elements for object detection. arXiv preprint arXiv:1504.07284"},{"issue":"6","key":"945_CR8","first-page":"437","volume":"2","author":"C Borgelt","year":"2012","unstructured":"Borgelt, C. (2012). Frequent item set mining. Wiley Interdisc Review: Data Mining and Knowledge Discovery, 2(6), 437\u2013456.","journal-title":"Wiley Interdisc Review: Data Mining and Knowledge Discovery"},{"key":"945_CR9","doi-asserted-by":"crossref","unstructured":"Bossard, L., Guillaumin, M., & Gool, L. V. (2014). Food-101 mining discriminative components with random forests. In Proceedings European Conference on Computer Vision, (pp. 446\u2013461).","DOI":"10.1007\/978-3-319-10599-4_29"},{"key":"945_CR10","doi-asserted-by":"crossref","unstructured":"Bourdev, L. D., & Malik, J. (2009). Poselets: Body part detectors trained using 3d human pose annotations. In Proceedings IEEE International Conference on Computer Vision, (pp. 1365\u20131372).","DOI":"10.1109\/ICCV.2009.5459303"},{"key":"945_CR11","doi-asserted-by":"crossref","unstructured":"Bourdev, L. D., Maji, S., Brox, T., & Malik, J. (2010). Detecting people using mutually consistent poselet activations. In Proceeding European Conference on Computer Vision, (pp. 168\u2013181).","DOI":"10.1007\/978-3-642-15567-3_13"},{"key":"945_CR12","doi-asserted-by":"crossref","unstructured":"Bourdev, L. D., Maji, S., & Malik, J. (2011). Describing people: A poselet-based approach to attribute classification. In Proceedings IEEE International Conference on Computer Vision, (pp. 1543\u20131550).","DOI":"10.1109\/ICCV.2011.6126413"},{"key":"945_CR13","doi-asserted-by":"crossref","unstructured":"Boureau, Y., Bach, F. R., LeCun, Y., & Ponce, J. (2010). Learning mid-level features for recognition. In Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (pp. 2559\u20132566).","DOI":"10.1109\/CVPR.2010.5539963"},{"key":"945_CR14","doi-asserted-by":"crossref","unstructured":"Chatfield, K., Simonyan, K., Vedaldi, A., & Zisserman, A. (2014). Return of the devil in the details: Delving deep into convolutional nets. In Proceedings British Machine Vision Conference.","DOI":"10.5244\/C.28.6"},{"key":"945_CR15","doi-asserted-by":"crossref","unstructured":"Cheng, H., Yan, X., Han, J., & Yu, P. S. (2008). Direct discriminative pattern mining for effective classification. In Proceedings IEEE International Conference on Data Engineering, (pp. 169\u2013178).","DOI":"10.1109\/ICDE.2008.4497425"},{"issue":"2","key":"945_CR16","doi-asserted-by":"crossref","first-page":"240","DOI":"10.1109\/TPAMI.2011.119","volume":"34","author":"MJ Choi","year":"2012","unstructured":"Choi, M. J., Torralba, A., & Willsky, A. S. (2012). A tree-based context model for object recognition. IEEE Transactions on Pattern Analysis and Machine Intelligence, 34(2), 240\u2013252.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"945_CR17","doi-asserted-by":"crossref","unstructured":"Cimpoi, M., Maji, S., & Vedaldi, A. (2015). Deep filter banks for texture recognition and segmentation. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, (pp. 3828\u20133836).","DOI":"10.1109\/CVPR.2015.7299007"},{"issue":"1","key":"945_CR18","doi-asserted-by":"crossref","first-page":"65","DOI":"10.1007\/s11263-015-0872-3","volume":"118","author":"M Cimpoi","year":"2016","unstructured":"Cimpoi, M., Maji, S., Kokkinos, I., & Vedaldi, A. (2016). Deep filter banks for texture recognition, description, and segmentation. International Journal of Computer Vision, 118(1), 65\u201394.","journal-title":"International Journal of Computer Vision"},{"key":"945_CR19","unstructured":"Courbariaux, M., & Bengio, Y. (2016). Binarynet: Training deep neural networks with weights and activations constrained to +1 or -1. arXiv preprint arXiv:1602.02830"},{"key":"945_CR20","doi-asserted-by":"crossref","unstructured":"Crowley, E., & Zisserman, A. (2014). The state of the art: Object retrieval in paintings using discriminative regions. In Proceedings British Machine Vision Conference.","DOI":"10.5244\/C.28.38"},{"key":"945_CR21","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L. J., Li, K., & Li, F. F. (2009). Imagenet: A large-scale hierarchical image database. In Proceedings IEEE Conference on Computer Vision and Pattern Recognition, (pp. 248\u2013255).","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"945_CR22","doi-asserted-by":"crossref","unstructured":"Diba, A., Pazandeh, A. M., Pirsiavash, H., & Gool, L. V. (2016). Deepcamp: Deep convolutional action & attribute mid-level patterns. In Proceedings IEEE Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2016.387"},{"key":"945_CR23","doi-asserted-by":"crossref","unstructured":"Divvala, S. K., Hoiem, D., Hays, J., Efros, A. A., Hebert, M. (2009). An empirical study of context in object detection. In Proceedings IEEE Conference on Computer Vision and Pattern Recognition, (pp. 1271\u20131278).","DOI":"10.1109\/CVPR.2009.5206532"},{"key":"945_CR24","doi-asserted-by":"crossref","unstructured":"Doersch, C., Singh, S., Gupta, A., Sivic, J., & Efros, A. A. (2012). What makes paris look like paris? In Proceedings Annual International ACM SIGIR Conference, 31(4), p. 101.","DOI":"10.1145\/2185520.2185597"},{"key":"945_CR25","unstructured":"Doersch, C., Gupta, A., & Efros, A. A. (2013). Mid-level visual element discovery as discriminative mode seeking. In Proceedings Advances in Neural Information Processing Systems, (pp. 494\u2013502)."},{"key":"945_CR26","doi-asserted-by":"crossref","unstructured":"Dosovitskiy, A., & Brox, T. (2016). Inverting visual representations with convolutional networks. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2016.522"},{"key":"945_CR27","doi-asserted-by":"crossref","unstructured":"Endres, I., Shih, K. J., Jiaa, J., & Hoiem, D. (2013). Learning collections of part models for object recognition. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, (pp. 939\u2013946).","DOI":"10.1109\/CVPR.2013.126"},{"issue":"2","key":"945_CR28","doi-asserted-by":"crossref","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Gool, L. J. V., Williams, C. K. I., Winn, J. M., & Zisserman, A. (2010). The pascal visual object classes (VOC) challenge. International Journal of Computer Vision, 88(2), 303\u2013338.","journal-title":"International Journal of Computer Vision"},{"issue":"1","key":"945_CR29","doi-asserted-by":"crossref","first-page":"98","DOI":"10.1007\/s11263-014-0733-5","volume":"111","author":"M Everingham","year":"2015","unstructured":"Everingham, M., Eslami, S. M. A., Gool, L. V., Williams, C. K. I., Winn, J. M., & Zisserman, A. (2015). The pascal visual object classes challenge: A retrospective. International Journal of Computer Vision, 111(1), 98\u2013136.","journal-title":"International Journal of Computer Vision"},{"key":"945_CR30","first-page":"1871","volume":"9","author":"RE Fan","year":"2008","unstructured":"Fan, R. E., Chang, K. W., Hsieh, C. J., Wang, X. R., & Lin, C. J. (2008). Liblinear: A library for large linear classification. Journal of Machine Learning Research, 9, 1871\u20131874.","journal-title":"Journal of Machine Learning Research"},{"issue":"9","key":"945_CR31","doi-asserted-by":"crossref","first-page":"1627","DOI":"10.1109\/TPAMI.2009.167","volume":"32","author":"PF Felzenszwalb","year":"2010","unstructured":"Felzenszwalb, P. F., Girshick, R. B., McAllester, D. A., & Ramanan, D. (2010). Object detection with discriminatively trained part-based models. IEEE Transactions on Pattern Analysis and Machine Intelligence, 32(9), 1627\u20131645.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"945_CR32","doi-asserted-by":"crossref","unstructured":"Fernando, B., & Tuytelaars, T. (2013). Mining multiple queries for image retrieval: On-the-fly learning of an object-specific mid-level representation. In Proceedings of IEEE International Conference on Computer Vision, (pp. 2544\u20132551).","DOI":"10.1109\/ICCV.2013.316"},{"key":"945_CR33","doi-asserted-by":"crossref","unstructured":"Fernando, B., Fromont, \u00c9., & Tuytelaars, T. (2012). Effective use of frequent itemset mining for image classification. In Proceedings of European Conference on Computer Vision, (pp. 214\u2013227).","DOI":"10.1007\/978-3-642-33718-5_16"},{"issue":"3","key":"945_CR34","doi-asserted-by":"crossref","first-page":"186","DOI":"10.1007\/s11263-014-0700-1","volume":"108","author":"B Fernando","year":"2014","unstructured":"Fernando, B., Fromont, \u00c9., & Tuytelaars, T. (2014). Mining mid-level features for image classification. International Journal of Computer Vision, 108(3), 186\u2013203.","journal-title":"International Journal of Computer Vision"},{"key":"945_CR35","doi-asserted-by":"crossref","unstructured":"Fouhey, D. F., Gupta, A., & Hebert, M. (2013). Data-driven 3d primitives for single image understanding. In Proceedings of IEEE International Conference on Computer Vision, (pp. 3392\u20133399).","DOI":"10.1109\/ICCV.2013.421"},{"key":"945_CR36","doi-asserted-by":"crossref","unstructured":"Fouhey, D. F., Hussain, W., Gupta, A., & Hebert, M. (2015). Single image 3d without a single 3d image. In Proceedings of IEEE International Conference on Computer Vision, (pp. 1053\u20131061).","DOI":"10.1109\/ICCV.2015.126"},{"key":"945_CR37","unstructured":"Gao, Y., Beijbom, O., Zhang, N., & Darrell, T. (2010). Compact bilinear pooling. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (pp. 317\u2013326)."},{"key":"945_CR38","unstructured":"Gilbert, A., & Bowden, R. (2014). Data mining for action recognition. In Proceedings of Asian Conference on Computer Vision, (pp. 290\u2013303)."},{"issue":"5","key":"945_CR39","doi-asserted-by":"crossref","first-page":"883","DOI":"10.1109\/TPAMI.2010.144","volume":"33","author":"A Gilbert","year":"2011","unstructured":"Gilbert, A., Illingworth, J., & Bowden, R. (2011). Action recognition using mined hierarchical compound features. IEEE Transactions on Pattern Analysis and Machine Intelligence, 33(5), 883\u2013897.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"945_CR40","doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrell, T., & Malik, J. (2014). Rich feature hierarchies for accurate object detection and semantic segmentation. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, (pp. 580\u2013587).","DOI":"10.1109\/CVPR.2014.81"},{"issue":"1","key":"945_CR41","doi-asserted-by":"crossref","first-page":"142","DOI":"10.1109\/TPAMI.2015.2437384","volume":"38","author":"RB Girshick","year":"2016","unstructured":"Girshick, R. B., Donahue, J., Darrell, T., & Malik, J. (2016). Region-based convolutional networks for accurate object detection and segmentation. IEEE Transactions on Pattern Analysis and Machine Intelligence, 38(1), 142\u2013158.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"945_CR42","doi-asserted-by":"crossref","unstructured":"Gong, Y., Wang, L., Guo, R., & Lazebnik, S. (2014). Multi-scale orderless pooling of deep convolutional activation features. In Proceedings of European Conference on Computer Vision, (pp. 392\u2013407).","DOI":"10.1007\/978-3-319-10584-0_26"},{"issue":"10","key":"945_CR43","doi-asserted-by":"crossref","first-page":"1347","DOI":"10.1109\/TKDE.2005.166","volume":"17","author":"G Grahne","year":"2005","unstructured":"Grahne, G., & Zhu, J. (2005). Fast algorithms for frequent itemset mining using fp-trees. IEEE Transactions on Knowledge and Data Engineering, 17(10), 1347\u20131362.","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"key":"945_CR44","doi-asserted-by":"crossref","unstructured":"Hariharan, B., Malik, J., & Ramanan, D. (2012). Discriminative decorrelation for clustering and classification. In Proceedings of European Conference on Computer Vision, (pp. 459\u2013472).","DOI":"10.1007\/978-3-642-33765-9_33"},{"issue":"9","key":"945_CR45","doi-asserted-by":"crossref","first-page":"1904","DOI":"10.1109\/TPAMI.2015.2389824","volume":"37","author":"K He","year":"2015","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2015). Spatial pyramid pooling in deep convolutional networks for visual recognition. IEEE Transactions on Pattern Analysis and Machine Intelligence, 37(9), 1904\u20131916.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"1","key":"945_CR46","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1007\/s11263-008-0137-5","volume":"80","author":"D Hoiem","year":"2008","unstructured":"Hoiem, D., Efros, A. A., & Hebert, M. (2008). Putting objects in perspective. International Journal of Computer Vision, 80(1), 3\u201315.","journal-title":"International Journal of Computer Vision"},{"key":"945_CR47","doi-asserted-by":"crossref","unstructured":"Jain, A., Gupta, A., Rodriguez, M., & Davis, L. S. (2013). Representing videos using mid-level discriminative patches. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, (pp. 2571\u20132578).","DOI":"10.1109\/CVPR.2013.332"},{"key":"945_CR48","doi-asserted-by":"crossref","unstructured":"Jegou, H., Douze, M., Schmid, C., & P\u00e9rez, P. (2010). Aggregating local descriptors into a compact image representation. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, (pp. 3304\u20133311).","DOI":"10.1109\/CVPR.2010.5540039"},{"key":"945_CR49","doi-asserted-by":"crossref","unstructured":"Jia, Y., Shelhamer, E., Donahue, J., Karayev, S., Long, J., Girshick, R., Guadarrama, S., & Darrell, T. (2014). Caffe: Convolutional architecture for fast feature embedding. arXiv preprint arXiv:1408.5093","DOI":"10.1145\/2647868.2654889"},{"key":"945_CR50","doi-asserted-by":"crossref","unstructured":"Juneja, M., Vedaldi, A., Jawahar, C. V., & Zisserman, A. (2013). Blocks that shout: Distinctive parts for scene classification. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, (pp. 923\u2013930).","DOI":"10.1109\/CVPR.2013.124"},{"key":"945_CR51","unstructured":"Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). Imagenet classification with deep convolutional neural networks. In Proceedings of Advances Neural Information Processing Systems, (pp. 1106\u20131114)."},{"key":"945_CR52","doi-asserted-by":"crossref","unstructured":"Lazebnik, S., Schmid, C., & Ponce, J. (2006). Beyond bags of features: Spatial pyramid matching for recognizing natural scene categories. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, (pp. 2169\u20132178).","DOI":"10.1109\/CVPR.2006.68"},{"key":"945_CR53","doi-asserted-by":"crossref","unstructured":"Lee, Y. J., Efros, A. A., & Hebert, M. (2013). Style-aware mid-level representation for discovering visual connections in space and time. In Proceedings of IEEE International Conference on Computer Vision, (pp. 1857\u20131864).","DOI":"10.1109\/ICCV.2013.233"},{"key":"945_CR54","doi-asserted-by":"crossref","unstructured":"Li, Q., Wu, J., & Tu, Z. (2013). Harvesting mid-level visual concepts from large-scale internet images. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, (pp. 851\u2013858).","DOI":"10.1109\/CVPR.2013.115"},{"key":"945_CR55","doi-asserted-by":"crossref","unstructured":"Li, Y., Liu, L., Shen, C., & van\u00a0den Hengel, A. (2015). Mid-level deep pattern mining. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, (pp. 971\u2013980).","DOI":"10.1109\/CVPR.2015.7298699"},{"key":"945_CR56","doi-asserted-by":"crossref","unstructured":"Lin, T., RoyChowdhury, A., & Maji, S. (2015). Bilinear CNN models for fine-grained visual recognition. In Proceedings of European Conference on Computer Vision, (pp. 1449\u20131457).","DOI":"10.1109\/ICCV.2015.170"},{"key":"945_CR57","unstructured":"Liu, L., & Wang, L. (2012). What has my classifier learned? visualizing the classification rules of bag-of-feature model by support region detection. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, (pp. 3586\u20133593)."},{"key":"945_CR58","unstructured":"Liu, L., Shen, C., Wang, L., van\u00a0den Hengel, A., & Wang, C. (2014). Encoding high dimensional local features by sparse coding based fisher vectors. In Proceedings of Advances Neural Information Processing Systems, (pp. 1143\u20131151)."},{"key":"945_CR59","doi-asserted-by":"crossref","unstructured":"Liu, L., Shen, C., & van\u00a0den Hengel, A. (2015). The treasure beneath convolutional layers: Cross convolutional layer pooling for image classification. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, (pp. 4749\u20134757).","DOI":"10.1109\/CVPR.2015.7299107"},{"key":"945_CR60","unstructured":"Malisiewicz, T., & Efros, A. A. (2009). Beyond categories: The visual memex model for reasoning about object relationships. In Proceedings of Advances Neural Information Processing Systems, (pp. 1222\u20131230)."},{"key":"945_CR61","doi-asserted-by":"crossref","unstructured":"Malisiewicz, T., Gupta, A., & Efros, A. A. (2011). Ensemble of exemplar-svms for object detection and beyond. In Proceedings of IEEE International Conference on Computer Vision, (pp. 89\u201396).","DOI":"10.1109\/ICCV.2011.6126229"},{"key":"945_CR62","doi-asserted-by":"crossref","unstructured":"Matzen, K., & Snavely, N. (2015). Bubblenet: Foveated imaging for visual discovery. In Proceedings of IEEE International Conference on Computer Vision, (pp. 1931\u20131939).","DOI":"10.1109\/ICCV.2015.224"},{"key":"945_CR63","doi-asserted-by":"crossref","unstructured":"Mettes, P., van Gemert, J. C., & Snoek, C. G. M. (2016). No spare parts: Sharing part detectors for image categorization. Computer Vision Image Understanding","DOI":"10.1016\/j.cviu.2016.07.008"},{"key":"945_CR64","doi-asserted-by":"crossref","unstructured":"Oquab, M., Bottou, L., Laptev, I., & Sivic, J. (2014). Learning and transferring mid-level image representations using convolutional neural networks. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, (pp. 1717\u20131724).","DOI":"10.1109\/CVPR.2014.222"},{"key":"945_CR65","unstructured":"Oramas, J., & Tuytelaars, T. (2016). Modeling visual compatibility through hierarchical mid-level elements. arXiv preprint arXiv:1604.00036"},{"key":"945_CR66","doi-asserted-by":"crossref","unstructured":"Owens, A., Xiao, J., Torralba, A., & Freeman, W. T. (2013). Shape anchors for data-driven multi-view reconstruction. In Proceedings of IEEE International Conference on Computer Vision, (pp. 33\u201340).","DOI":"10.1109\/ICCV.2013.461"},{"key":"945_CR67","unstructured":"Parizi, S. N., Vedaldi, A., Zisserman, A., & Felzenszwalb, P. (2015). Automatic discovery and optimization of parts for image classification. In Proceedings International Conference on Learning Representations."},{"key":"945_CR68","doi-asserted-by":"crossref","unstructured":"Perronnin, F., Liu, Y., S\u00e1nchez, J., Poirier, H. (2010a) Large-scale image retrieval with compressed fisher vectors. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, (pp. 3384\u20133391).","DOI":"10.1109\/CVPR.2010.5540009"},{"key":"945_CR69","doi-asserted-by":"crossref","unstructured":"Perronnin, F., S\u00e1nchez, J., Mensink, T. (2010b) Improving the fisher kernel for large-scale image classification. In Proceedings of European Conference on Computer Vision, (pp. 143\u2013156).","DOI":"10.1007\/978-3-642-15561-1_11"},{"key":"945_CR70","doi-asserted-by":"crossref","unstructured":"Quack, T., Ferrari, V., Leibe, B., & Gool, L. J. V. (2007). Efficient mining of frequent and distinctive feature configurations. In Proceedings of IEEE International Conference on Computer Vision, (pp. 1\u20138).","DOI":"10.1109\/ICCV.2007.4408906"},{"key":"945_CR71","doi-asserted-by":"crossref","unstructured":"Quattoni, A., & Torralba, A. (2009). Recognizing indoor scenes. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, (pp. 413\u2013420).","DOI":"10.1109\/CVPR.2009.5206537"},{"key":"945_CR72","unstructured":"Rastegari, M., Ordonez, V., Redmon, J., & Farhadi, A. (2016). In Proceedings of European Conference on Computer Vision."},{"key":"945_CR73","doi-asserted-by":"crossref","unstructured":"Razavian, A. S., Azizpour, H., Sullivan, J., & Carlsson, S. (2014). Cnn features off-the-shelf: An astounding baseline for recognition. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, (pp. 512\u2013519).","DOI":"10.1109\/CVPRW.2014.131"},{"key":"945_CR74","doi-asserted-by":"crossref","unstructured":"Rematas, K., Fernando, B., Dellaert, F., & Tuytelaars, T. (2015). Dataset fingerprints: Exploring image collections through data mining. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, (pp. 4867\u20134875).","DOI":"10.1109\/CVPR.2015.7299120"},{"issue":"3","key":"945_CR75","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., Deng, J., Su, H., Krause, J., Satheesh, S., Ma, S., et al. (2015). Imagenet large scale visual recognition challenge. International Journal of Computer Vision, 115(3), 211\u2013252.","journal-title":"International Journal of Computer Vision"},{"issue":"8","key":"945_CR76","doi-asserted-by":"crossref","first-page":"1571","DOI":"10.1109\/TPAMI.2014.2366122","volume":"37","author":"KJ Shih","year":"2015","unstructured":"Shih, K. J., Endres, I., & Hoiem, D. (2015). Learning discriminative collections of part detectors for object recognition. IEEE Transactions on Pattern Analysis and Machine Intelligence, 37(8), 1571\u20131584.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"945_CR77","doi-asserted-by":"crossref","unstructured":"Shrivastava, A., Malisiewicz, T., Gupta, A., & Efros, A. A. (2011). Data-driven visual similarity for cross-domain image matching. Proceedings of Annual ACM SIGIR Conference, 30(6), p. 154.","DOI":"10.1145\/2024156.2024188"},{"key":"945_CR78","unstructured":"Simonyan, K., & Zisserman, A. (2015). Very deep convolutional networks for large-scale image recognition. In Proceedings International Conference on Learning Representations."},{"key":"945_CR79","unstructured":"Simonyan, K., Vedaldi, A., & Zisserman, A. (2013). Deep fisher networks for large-scale image classification. In Proceedings of Advances Neural Information Processing Systems, (pp. 163\u2013171)."},{"key":"945_CR80","doi-asserted-by":"crossref","unstructured":"Singh, S., Gupta, A., & Efros, A. A. (2012). Unsupervised discovery of mid-level discriminative patches. In Proceedings of European Conference on Computer Vision, (pp. 73\u201386).","DOI":"10.1007\/978-3-642-33709-3_6"},{"key":"945_CR81","doi-asserted-by":"crossref","unstructured":"Sivic, J., & Zisserman, A. (2003). Video google: A text retrieval approach to object matching in videos. In Proceedings of IEEE International Conference on Computer Vision, (pp. 1470\u20131477).","DOI":"10.1109\/ICCV.2003.1238663"},{"key":"945_CR82","unstructured":"Song, H. O., Lee, Y. J., Jegelka, S., & Darrell, T. (2014). Weakly-supervised discovery of visual pattern configurations. In Proceedings of Advances Neural Information Processing Systems, (pp. 1637\u20131645)."},{"key":"945_CR83","doi-asserted-by":"crossref","unstructured":"Sun, J., & Ponce, J. (2013). Learning discriminative part detectors for image classification and cosegmentation. In Proceedings of IEEE International Conference on Computer Vision, (pp. 3400\u20133407).","DOI":"10.1109\/ICCV.2013.422"},{"key":"945_CR84","first-page":"1","volume":"2","author":"J Sun","year":"2016","unstructured":"Sun, J., & Ponce, J. (2016). Learning dictionary of discriminative part detectors for image categorization and cosegmentation. International Journal of Computer Vision, 2, 1\u201323.","journal-title":"International Journal of Computer Vision"},{"issue":"2","key":"945_CR85","doi-asserted-by":"crossref","first-page":"169","DOI":"10.1023\/A:1023052124951","volume":"53","author":"A Torralba","year":"2003","unstructured":"Torralba, A. (2003). Contextual priming for object detection. International Journal of Computer Vision, 53(2), 169\u2013191.","journal-title":"International Journal of Computer Vision"},{"key":"945_CR86","unstructured":"Uno, T., Asai, T., Uchida, Y., & Arimura, H. (2003). LCM: An efficient algorithm for enumerating frequent closed item sets. In Proceedings of the Workshop on Frequent Itemset Mining Implementations, International Conference on Data Mining."},{"key":"945_CR87","doi-asserted-by":"crossref","unstructured":"Voravuthikunchai, W., Cr\u00e9milleux, B., & Jurie, F. (2014). Histograms of pattern sets for image classification and object recognition. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, (pp. 224\u2013231).","DOI":"10.1109\/CVPR.2014.36"},{"issue":"1","key":"945_CR88","doi-asserted-by":"crossref","first-page":"169","DOI":"10.1007\/s10618-010-0202-x","volume":"23","author":"J Vreeken","year":"2011","unstructured":"Vreeken, J., van Leeuwen, M., & Siebes, A. (2011). Krimp: mining itemsets that compress. Data Mining and Knowledge Discovery, 23(1), 169\u2013214.","journal-title":"Data Mining and Knowledge Discovery"},{"issue":"5","key":"945_CR89","doi-asserted-by":"crossref","first-page":"914","DOI":"10.1109\/TPAMI.2013.198","volume":"36","author":"J Wang","year":"2014","unstructured":"Wang, J., Liu, Z., Wu, Y., & Yuan, J. (2014). Learning actionlet ensemble for 3d human action recognition. IEEE Transactions on Pattern Analysis and Machine Intelligence, 36(5), 914\u2013927.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"945_CR90","doi-asserted-by":"crossref","unstructured":"Wang, J., Yang, Y., Mao, J., Huang, Z., & Xu, C. H. W. (2016a). Cnn-rnn: A unified framework for multi-label image classification. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2016.251"},{"key":"945_CR91","doi-asserted-by":"crossref","unstructured":"Wang, L., Qiao, Y., Tang, X. (2013a) Motionlets: Mid-level 3d parts for human motion recognition. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, (pp. 2674\u20132681).","DOI":"10.1109\/CVPR.2013.345"},{"key":"945_CR92","unstructured":"Wang, X., Wang, B., Bai, X., Liu, W., Tu, Z. (2013b) Max-margin multiple-instance dictionary learning. In Proceedings International Conference on Machine Learning, (pp. 846\u2013854)."},{"key":"945_CR93","doi-asserted-by":"crossref","unstructured":"Wang, Y., Choi, J., Morariu, V. I., & Davis, L. S. (2016b). Mining discriminative triplets of patches for fine-grained classification. In Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (pp. 1163\u20131172).","DOI":"10.1109\/CVPR.2016.131"},{"key":"945_CR94","unstructured":"Wei, Y., Xia, W., Huang, J., Ni, B., Dong, J., Zhao, Y., Yan, S. (2014). CNN: single-label to multi-label. CoRR arXiv:1406.5726"},{"key":"945_CR95","doi-asserted-by":"crossref","unstructured":"Yao, B., & Fei-Fei, L. (2010). Grouplet: A structured image representation for recognizing human and object interactions. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, (pp. 9\u201316).","DOI":"10.1109\/CVPR.2010.5540234"},{"key":"945_CR96","doi-asserted-by":"crossref","unstructured":"Yoo, D., Park, S., Lee, J. Y., & Kweon, I. S. (2015). Multi-scale pyramid pooling for deep convolutional representation. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, (pp. 71\u201380).","DOI":"10.1109\/CVPRW.2015.7301274"},{"key":"945_CR97","doi-asserted-by":"crossref","unstructured":"Yuan, J., Wu, Y., & Yang, M. (2007). Discovery of collocation patterns: from visual words to visual phrases. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2007.383222"},{"key":"945_CR98","doi-asserted-by":"crossref","unstructured":"Zeiler, M. D., & Fergus, R. (2014). Visualizing and understanding convolutional networks. In Proceedings of European Conference on Computer Vision, (pp. 818\u2013833).","DOI":"10.1007\/978-3-319-10590-1_53"},{"key":"945_CR99","doi-asserted-by":"crossref","unstructured":"Zhao, R., Ouyang, W., & Wang, X. (2014). Learning mid-level filters for person re-identification. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, (pp. 144\u2013151).","DOI":"10.1109\/CVPR.2014.26"},{"key":"945_CR100","unstructured":"Zhou, B., Lapedriza \u00c0, Xiao, J., Torralba, A., & Oliva, A. (2014). Learning deep features for scene recognition using places database. In Proceedings of Advances Neural Information Processing Systems, (pp. 487\u2013495)."}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-016-0945-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11263-016-0945-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-016-0945-y","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-016-0945-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,13]],"date-time":"2019-09-13T01:35:37Z","timestamp":1568338537000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11263-016-0945-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,8,29]]},"references-count":100,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2017,2]]}},"alternative-id":["945"],"URL":"https:\/\/doi.org\/10.1007\/s11263-016-0945-y","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,8,29]]}}}