{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T16:44:27Z","timestamp":1781541867413,"version":"3.54.5"},"reference-count":54,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2014,6,25]],"date-time":"2014-06-25T00:00:00Z","timestamp":1403654400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2015,1]]},"DOI":"10.1007\/s11263-014-0733-5","type":"journal-article","created":{"date-parts":[[2014,6,24]],"date-time":"2014-06-24T13:35:47Z","timestamp":1403616947000},"page":"98-136","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5200,"title":["The Pascal Visual Object Classes Challenge: A Retrospective"],"prefix":"10.1007","volume":"111","author":[{"given":"Mark","family":"Everingham","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"S. M. Ali","family":"Eslami","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Luc","family":"Van Gool","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Christopher K. I.","family":"Williams","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"John","family":"Winn","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Andrew","family":"Zisserman","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2014,6,25]]},"reference":[{"key":"733_CR1","unstructured":"Alexe, B., Deselaers, T., & Ferrari, V. (2010). What is an object? In Proceedings of Conference on Computer Vision and Pattern Recognition (pp. 73\u201380)."},{"key":"733_CR2","doi-asserted-by":"crossref","unstructured":"Alexiou, I., & Bharath, A. (2012). Efficient Kernels couple visual words through categorical opponency. In Proceedings of British Machine Vision Conference.","DOI":"10.5244\/C.26.82"},{"key":"733_CR3","unstructured":"Bertail, P., Cl\u00e9men\u00e7on, S. J., & Vayatis, N. (2009). On bootstrapping the ROC curve. In D. Koller, D. Schuurmans, Y. Bengio, & L. Bottou (Eds.), Advances in Neural Information Processing Systems (Vol. 21, pp. 137\u2013144). Red Hook, NY: Curran Associates, Inc."},{"key":"733_CR4","doi-asserted-by":"crossref","unstructured":"Carreira, J., Caseiro, R., Batista, J., & Sminchisescu, C. (2012). Semantic segmentation with second-order pooling. In Proceedings of European Conference on Computer Vision.","DOI":"10.1007\/978-3-642-33786-4_32"},{"key":"733_CR5","unstructured":"Chang, C. C., & Lin, C. J. (2011). LIBSVM: A library for support vector machines. Transactions on Intelligent Systems and Technology, 2, 27:1\u201327:27. Software available at http:\/\/www.csie.ntu.edu.tw\/~cjlin\/libsvm ."},{"key":"733_CR6","unstructured":"Chen, Q., Song, Z., Hua, Y., Huang, Z., & Yan, S. (2012). Generalized hierarchical matching for image classification. In Proceedings of Conference on Computer Vision and Pattern Recognition."},{"key":"733_CR7","unstructured":"Csurka, G., Dance, C., Fan, L., Williamowski, J., & Bray, C. (2004). Visual categorization with bags of keypoints. In Proceedings of ECCV2004 Workshop on Statistical Learning in Computer Vision (pp. 59\u201374)."},{"key":"733_CR8","doi-asserted-by":"crossref","unstructured":"Dalal, N., & Triggs, B. (2005). Histograms of oriented gradients for human detection. In Proceedings of Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2005.177"},{"key":"733_CR9","unstructured":"Donahue, J., Jia, Y., Vinyals, O., Hoffman, J., Zhang, N., Tzeng, E., & Darrell, T. (2013). Decaf: A deep convolutional activation feature for generic visual recognition. CoRR abs\/1310.1531."},{"key":"733_CR10","doi-asserted-by":"crossref","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Van Gool, L., Williams, C. K. I., Winn, J., & Zisserman, A. (2010). The PASCAL visual object classes (VOC) challenge. International Journal of Computer Vision, 88, 303\u2013338.","journal-title":"International Journal of Computer Vision"},{"key":"733_CR11","doi-asserted-by":"crossref","unstructured":"Farhadi, A., Endres, I., Hoiem, D., & Forsyth, D. (2009). Describing objects by their attributes. In Proceedings of Conference on Computer Vision and Pattern Recognition, IEEE (pp. 1778\u2013 1785).","DOI":"10.1109\/CVPR.2009.5206772"},{"issue":"9","key":"733_CR12","doi-asserted-by":"crossref","first-page":"1627","DOI":"10.1109\/TPAMI.2009.167","volume":"32","author":"PF Felzenszwalb","year":"2010","unstructured":"Felzenszwalb, P. F., Girshick, R. B., McAllester, D., & Ramanan, D. (2010). Object detection with discriminatively trained part based models. Transactions on Pattern Analysis and Machine Intelligence, 32(9), 1627\u20131645.","journal-title":"Transactions on Pattern Analysis and Machine Intelligence"},{"key":"733_CR13","unstructured":"Flickr website. (2013). http:\/\/www.flickr.com\/ ."},{"key":"733_CR14","doi-asserted-by":"crossref","unstructured":"Girshick, R. B., Donahue, J., Darrell, T., & Malik, J. (2014). Rich feature hierarchies for accurate object detection and semantic segmentation. In Proceedings of Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2014.81"},{"key":"733_CR15","doi-asserted-by":"crossref","first-page":"743","DOI":"10.1093\/biomet\/91.3.743","volume":"91","author":"P Hall","year":"2004","unstructured":"Hall, P., Hyndman, R., & Fan, Y. (2004). Nonparametric confidence intervals for receiver operating characteristic curves. Biometrika, 91, 743\u201350.","journal-title":"Biometrika"},{"key":"733_CR16","unstructured":"Hoai, M., Ladicky, L., & Zisserman, A. (2012). Action Recognition from Still Images by Aligning Body Parts. http:\/\/pascallin.ecs.soton.ac.uk\/challenges\/VOC\/voc2012\/workshop\/segmentation_action_layout.pdf . Slides contained in the presentation by Luc van Gool on Overview and results of the segmentation challenge and action taster."},{"key":"733_CR17","doi-asserted-by":"crossref","unstructured":"Hoiem, D., Chodpathumwan, Y., & Dai, Q. (2012). Diagnosing error in object detectors. In Proceedings of European Conference on Computer Vision.","DOI":"10.1007\/978-3-642-33712-3_25"},{"key":"733_CR18","doi-asserted-by":"crossref","unstructured":"Ion, A., Carreira, J., Sminchisescu, C. (2011a). Image segmentation by figure-ground composition into maximal cliques. In Proceedings of International Conference on Computer Vision.","DOI":"10.1109\/ICCV.2011.6126486"},{"key":"733_CR19","unstructured":"Ion, A., Carreira, J., & Sminchisescu, C. (2011b). Probabilistic joint image segmentation and labeling. In J. Shawe-Taylor, R. S. Zemel, P. L. Bartlett, F. Pereira, & K. Q. Weinberger (Eds.), Advances in Neural Information Processing Systems (Vol. 24, pp. 1827\u20131835). Red Hook, NY: Curran Associates, Inc."},{"key":"733_CR20","doi-asserted-by":"crossref","unstructured":"Karaoglu, S., Van Gemert, J., & Gevers, T. (2012). Object reading: Text recognition for object recognition. In Proceedings of ECCV 2012 Workshops and Gemonstrations.","DOI":"10.1007\/978-3-642-33885-4_46"},{"key":"733_CR21","unstructured":"Khan, F., Anwer, R., Van de Weijer, J., Bagdanov, A., Vanrell, M., & Lopez, A. M. (2012a). Color attributes for object detection. In Proceedings of Conference on Computer Vision and Pattern Recognition."},{"issue":"1","key":"733_CR22","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1007\/s11263-011-0495-2","volume":"98","author":"F Khan","year":"2012","unstructured":"Khan, F., Van de Weijer, J., & Vanrell, M. (2012b). Modulating shape features by color attention for object recognition. International Journal of Computer Vision, 98(1), 49\u201364.","journal-title":"International Journal of Computer Vision"},{"key":"733_CR23","unstructured":"Khosla, A., Yao, B., & Fei-Fei, L. (2011). Combining randomization and discrimination for fine-grained image categorization. In Proceedings of Conference on Computer Vision and Pattern Recognition."},{"key":"733_CR24","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G. E. (2012). ImageNet classification with deep convolutional neural networks. In F. Pereira, C. J. C. Burges, L. Bottou, & K. Q. Weinberger (Eds.), Advances in Neural Information Processing Systems (Vol. 25, pp. 1106\u20131114). Red Hook, NY: Curran Associates, Inc."},{"key":"733_CR25","doi-asserted-by":"crossref","unstructured":"Lazebnik, S., Schmid, C., & Ponce, J. (2006). Beyond bags of features: Spatial pyramid matching for recognizing natural scene categories. In Proceedings of Conference on Computer Vision and Pattern Recognition (pp 2169\u20132178).","DOI":"10.1109\/CVPR.2006.68"},{"key":"733_CR26","unstructured":"Leibe, B., Leonardis, A., & Schiele, B. (2004). Combined object categorization and segmentation with an implicit shape model. In Proceedings of ECCV Workshop on Statistical Learning in Computer Vision."},{"key":"733_CR27","unstructured":"Lempitsky, V., & Zisserman, A. (2010). Learning to count objects in images. In J. D. Lafferty, C. K. I. Williams, J. Shawe-Taylor, R. S. Zemel & A. Culotta (Eds.), Advances in Neural Information Processing Systems (Vol. 23, pp. 1324\u20131332). Red Hook, NY: Curran Associates, Inc. http:\/\/papers.nips.cc\/paper\/4043-learning-to-count-objects-in-images.pdf"},{"key":"733_CR28","doi-asserted-by":"crossref","unstructured":"Li, F., Carreira, J., Lebanon, G., & Sminchisescu, C. (2013). Composite statistical inference for semantic segmentation. In Proceedings of Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2013.424"},{"issue":"2","key":"733_CR29","doi-asserted-by":"crossref","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"DG Lowe","year":"2004","unstructured":"Lowe, D. G. (2004). Distinctive image features from scale-invariant keypoints. International Journal of Computer Vision, 60(2), 91\u2013 110.","journal-title":"International Journal of Computer Vision"},{"issue":"4","key":"733_CR30","doi-asserted-by":"crossref","first-page":"2171","DOI":"10.1016\/j.asoc.2012.12.013","volume":"13","author":"L Nanni","year":"2013","unstructured":"Nanni, L., & Lumini, A. (2013). Heterogeneous bag-of-features for object\/scene recognition. Applied Soft Computing, 13(4), 2171\u20132178.","journal-title":"Applied Soft Computing"},{"key":"733_CR31","unstructured":"O\u2019Connor, B. (2010). A response to \u201ccomparing Precision-Recall curves the Bayesian way?\u201d. A comment on the blog post by Bob Carpenter on Comparing Precision-Recall Curves the Bayesian Way? http:\/\/lingpipe-blog.com\/2010\/01\/29\/comparing-precision-recall-curves-bayesian-way\/ ."},{"key":"733_CR32","doi-asserted-by":"crossref","unstructured":"Oquab, M., Bottou, L., Laptev, I., Sivic, J. (2014). Learning and transferring mid-level image representations using convolutional neural networks. In Proceedings of Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2014.222"},{"key":"733_CR33","doi-asserted-by":"crossref","unstructured":"Russakovsky, O., Lin, Y., Yu, K., & Fei-Fei, L. (2012). Object-centric spatial pooling for image classification. In Proceedings of European Conference on Computer Vision.","DOI":"10.1007\/978-3-642-33709-3_1"},{"key":"733_CR34","doi-asserted-by":"crossref","unstructured":"Russell, B., Torralba, A., Murphy, K., & Freeman, W. T. (2008). LabelMe: A database and web-based tool for image annotation. International Journal of Computer Vision, 77(1\u20133), 157\u2013173. http:\/\/labelme.csail.mit.edu\/","DOI":"10.1007\/s11263-007-0090-8"},{"key":"733_CR35","volume-title":"Introduction to modern information retrieval","author":"G Salton","year":"1986","unstructured":"Salton, G., & Mcgill, M. J. (1986). Introduction to modern information retrieval. New York, NY: McGraw-Hill Inc."},{"key":"733_CR36","doi-asserted-by":"crossref","unstructured":"Sener, F., Bas, C., Ikizler-Cinbis, N. (2012). On recognizing actions in still images via multiple features. In Proceedings of ECCV Workshop on Action Recognition and Pose Estimation in Still Images.","DOI":"10.1007\/978-3-642-33885-4_27"},{"key":"733_CR37","doi-asserted-by":"crossref","unstructured":"Song, Z., Chen, Q., Huang, Z., Hua, Y., & Yan, S. (2011). Contextualizing object detection and classification. In Proceedings of Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2011.5995330"},{"key":"733_CR38","unstructured":"Pascal VOC 2012 challenge results. (2012). http:\/\/pascallin.ecs.soton.ac.uk\/challenges\/VOC\/voc2012\/results\/index.html ."},{"key":"733_CR39","unstructured":"Pascal VOC annotation guidelines. (2012). http:\/\/pascallin.ecs.soton.ac.uk\/challenges\/VOC\/voc2012\/guidelines.html ."},{"key":"733_CR40","unstructured":"Pascal VOC best practice guidelines. (2012). http:\/\/pascallin.ecs.soton.ac.uk\/challenges\/VOC\/#bestpractice ."},{"key":"733_CR41","unstructured":"Pascal VOC evaluation server. (2012) http:\/\/host.robots.ox.ac.uk:8080\/ ."},{"key":"733_CR42","doi-asserted-by":"crossref","unstructured":"Torralba, A., & Efros, A. A. (2011). Unbiased look at dataset bias. In Proceedings of Conference on Computer Vision and Pattern Recognition, IEEE (pp. 1521\u20131528).","DOI":"10.1109\/CVPR.2011.5995347"},{"key":"733_CR43","doi-asserted-by":"crossref","unstructured":"Uijlings, J., Van de Sande, K., Gevers, T., & Smeulders, A. (2013). Selective search for object recognition. International Journal of Computer Vision, 104(2), 154\u2013171.","DOI":"10.1007\/s11263-013-0620-5"},{"key":"733_CR44","doi-asserted-by":"crossref","unstructured":"Van de Sande, K., Uijlings, J., Gevers, T., & Smeulders, A. (2011). Segmentation as selective search for object recognition. In Proceedings of International Conference on Computer Vision.","DOI":"10.1109\/ICCV.2011.6126456"},{"key":"733_CR45","doi-asserted-by":"crossref","unstructured":"Van Gemert, J. (2011). Exploiting photographic style for category-level image classification by generalizing the spatial pyramid. In Proceedings of International Conference on Multimedia Retrieval.","DOI":"10.1145\/1991996.1992010"},{"key":"733_CR46","doi-asserted-by":"crossref","unstructured":"Vedaldi, A., Gulshan, V., Varma, M., & Zisserman, A. (2009). Multiple kernels for object detection. In International Conference on Computer Vision.","DOI":"10.1109\/ICCV.2009.5459183"},{"issue":"2","key":"733_CR47","doi-asserted-by":"crossref","first-page":"137","DOI":"10.1023\/B:VISI.0000013087.49260.fb","volume":"57","author":"P Viola","year":"2004","unstructured":"Viola, P., & Jones, M. (2004). Robust real-time object detection. International Journal of Computer Vision, 57(2), 137\u2013154.","journal-title":"International Journal of Computer Vision"},{"key":"733_CR48","doi-asserted-by":"crossref","unstructured":"Wang, X., Lin, L., Huang, L., & Yan, S. (2013). Incorporating structural alternatives and sharing into hierarchy for multiclass object recognition and detection. In Proceedings of Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2013.428"},{"key":"733_CR49","doi-asserted-by":"crossref","DOI":"10.1007\/978-0-387-21736-9","volume-title":"All of statistics","author":"L Wasserman","year":"2004","unstructured":"Wasserman, L. (2004). All of statistics. Berlin: Springer."},{"key":"733_CR50","doi-asserted-by":"crossref","unstructured":"Xia, W., Song, Z., Feng, J., Cheong, L. F., & Yan, S. (2012). Segmentation over detection by coupled global and local sparse representations. In Proceedings of European Conference on Computer Vision.","DOI":"10.1007\/978-3-642-33715-4_48"},{"key":"733_CR51","unstructured":"Yang, J., Yu, K., Gong, Y., & Huang, T. (2009). Linear spatial pyramid matching using sparse coding for image classification. In Proceedings of Conference on Computer Vision and Pattern Recognition."},{"key":"733_CR52","unstructured":"Zeiler, M. D., & Fergus, R. (2013). Visualizing and understanding convolutional networks. CoRR abs\/1311.2901."},{"key":"733_CR53","unstructured":"Zhu, L., Chen, Y., Yuille, A., & Freeman, W. (2010). Latent hierarchical structural learning for object detection. In Proceedings of Conference on Computer Vision and Pattern Recognition."},{"issue":"11","key":"733_CR54","doi-asserted-by":"crossref","first-page":"2081","DOI":"10.1109\/TPAMI.2012.204","volume":"34","author":"A Zisserman","year":"2012","unstructured":"Zisserman, A., Winn, J., Fitzgibbon, A., Van Gool, L., Sivic, J., Williams, C., et al. (2012). In memoriam: Mark Everingham. Transactions on Pattern Analysis and Machine Intelligence, 34(11), 2081\u20132082.","journal-title":"Transactions on Pattern Analysis and Machine Intelligence"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-014-0733-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11263-014-0733-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-014-0733-5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,8,11]],"date-time":"2019-08-11T21:37:22Z","timestamp":1565559442000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11263-014-0733-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,6,25]]},"references-count":54,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2015,1]]}},"alternative-id":["733"],"URL":"https:\/\/doi.org\/10.1007\/s11263-014-0733-5","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014,6,25]]}}}