{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,6]],"date-time":"2025-11-06T06:10:35Z","timestamp":1762409435217,"version":"3.37.3"},"reference-count":55,"publisher":"Springer Science and Business Media LLC","issue":"11-12","license":[{"start":{"date-parts":[[2018,7,17]],"date-time":"2018-07-17T00:00:00Z","timestamp":1531785600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2019,12]]},"DOI":"10.1007\/s11263-018-1109-z","type":"journal-article","created":{"date-parts":[[2018,7,17]],"date-time":"2018-07-17T07:54:28Z","timestamp":1531814068000},"page":"1659-1679","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":33,"title":["End-to-End Learning of Latent Deformable Part-Based Representations for Object Detection"],"prefix":"10.1007","volume":"127","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4775-9239","authenticated-orcid":false,"given":"Taylor","family":"Mordan","sequence":"first","affiliation":[]},{"given":"Nicolas","family":"Thome","sequence":"additional","affiliation":[]},{"given":"Gilles","family":"Henaff","sequence":"additional","affiliation":[]},{"given":"Matthieu","family":"Cord","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,7,17]]},"reference":[{"key":"1109_CR1","doi-asserted-by":"crossref","unstructured":"Azizpour, H., & Laptev, I.(2012). Object detection using strongly-supervised deformable part models. In Proceedings of the IEEE European conference on computer vision (ECCV) (pp. 836\u2013849).","DOI":"10.1007\/978-3-642-33718-5_60"},{"key":"1109_CR2","doi-asserted-by":"crossref","unstructured":"Bell, S., Zitnick, L., Bala, K., & Girshick, R.(2016). Inside-outside net: Detecting objects in context with skip pooling and recurrent neural networks. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2016.314"},{"key":"1109_CR3","doi-asserted-by":"crossref","unstructured":"Ben-Younes, H., Cad\u00e8ne, R., Thome, N., & Cord M. (2017). MUTAN: Multimodal tucker fusion for visual question answering. In Proceedings of the IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2017.285"},{"key":"1109_CR4","doi-asserted-by":"crossref","unstructured":"Chandra, S., Usunier, N., Kokkinos, I. (2017). Dense and low-rank gaussian CRFs using deep embeddings. In Proceedings of the IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2017.546"},{"key":"1109_CR5","unstructured":"Chen, L. C., Papandreou, G., Kokkinos, I., Murphy, K., & Yuille, A. (2015). Semantic image segmentation with deep convolutional nets and fully connected CRFs. In Proceedings of the international conference on learning representations (ICLR)."},{"key":"1109_CR6","doi-asserted-by":"crossref","unstructured":"Dai, J., He, K., Li, Y., Ren, S., & Sun, J. (2016a). Instance-sensitive fully convolutional networks. In Proceedings of the IEEE European conference on computer vision (ECCV) (pp. 534\u2013549).","DOI":"10.1007\/978-3-319-46466-4_32"},{"key":"1109_CR7","unstructured":"Dai, J., Li, Y., He, K., & Sun, J. (2016b). R-FCN: Object detection via region-based fully convolutional networks. In Advances in neural information processing systems (NIPS)."},{"key":"1109_CR8","doi-asserted-by":"crossref","unstructured":"Dai, J., Qi, H., Xiong, Y., Li, Y., Zhang, G., Hu, H., & Wei, Y. (2017). Deformable convolutional networks. In Proceedings of the IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2017.89"},{"key":"1109_CR9","doi-asserted-by":"crossref","unstructured":"Durand, T., Mordan, T., Thome, N., & Cord, M. (2017). WILDCAT: Weakly supervised learning of deep convnets for image classification, pointwise localization and segmentation. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.631"},{"issue":"1","key":"1109_CR10","doi-asserted-by":"crossref","first-page":"98","DOI":"10.1007\/s11263-014-0733-5","volume":"111","author":"M Everingham","year":"2015","unstructured":"Everingham, M., Eslami, A., Van Gool, L., Williams, C., Winn, J., & Zisserman, A. (2015). The PASCAL visual object classes challenge: A retrospective. International Journal of Computer Vision (IJCV), 111(1), 98\u2013136.","journal-title":"International Journal of Computer Vision (IJCV)"},{"issue":"9","key":"1109_CR11","doi-asserted-by":"crossref","first-page":"1627","DOI":"10.1109\/TPAMI.2009.167","volume":"32","author":"P Felzenszwalb","year":"2010","unstructured":"Felzenszwalb, P., Girshick, R., McAllester, D., & Ramanan, D. (2010). Object detection with discriminatively trained part-based models. IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI), 32(9), 1627\u20131645.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)"},{"key":"1109_CR12","doi-asserted-by":"crossref","unstructured":"Fidler, S., Mottaghi, R., Yuille, A., & Urtasun, R. (2013). Bottom-up segmentation for top-down detection. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR) (pp. 3294\u20133301).","DOI":"10.1109\/CVPR.2013.423"},{"key":"1109_CR13","doi-asserted-by":"crossref","unstructured":"Gidaris, S., & Komodakis, N.(2015). Object detection via a multi-region and semantic segmentation-aware CNN model. In Proceedings of the IEEE international conference on computer vision (ICCV) (pp. 1134\u20131142).","DOI":"10.1109\/ICCV.2015.135"},{"key":"1109_CR14","doi-asserted-by":"crossref","unstructured":"Gidaris, S., & Komodakis, N. (2016a). Attend refine repeat: Active box proposal generation via in-out localization. In Proceedings of the British machine vision conference (BMVC).","DOI":"10.5244\/C.30.90"},{"key":"1109_CR15","doi-asserted-by":"crossref","unstructured":"Gidaris, S., & Komodakis, N.(2016b). LocNet: Improving localization accuracy for object detection. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2016.92"},{"key":"1109_CR16","doi-asserted-by":"crossref","unstructured":"Girshick, R. (2015). Fast R-CNN. In Proceedings of the IEEE international conference on computer vision (ICCV) (pp. 1440\u20131448).","DOI":"10.1109\/ICCV.2015.169"},{"key":"1109_CR17","doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrell, T., & Malik, J. (2014). Rich feature hierarchies for accurate object detection and semantic segmentation. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR) (pp. 580\u2013587).","DOI":"10.1109\/CVPR.2014.81"},{"key":"1109_CR18","doi-asserted-by":"crossref","unstructured":"Girshick, R., Iandola, F., Darrell, T., & Malik, J. (2015). Deformable part models are convolutional neural networks. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR) (pp. 437\u2013446).","DOI":"10.1109\/CVPR.2015.7298641"},{"issue":"9","key":"1109_CR19","doi-asserted-by":"crossref","first-page":"1904","DOI":"10.1109\/TPAMI.2015.2389824","volume":"37","author":"K He","year":"2015","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2015). Spatial pyramid pooling in deep convolutional networks for visual recognition. IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI), 37(9), 1904\u20131916.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)"},{"key":"1109_CR20","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2016.90"},{"key":"1109_CR21","doi-asserted-by":"crossref","unstructured":"Kong, T., Yao, A., Chen, Y., & Sun, F. (2016). HyperNet: Towards accurate region proposal generation and joint object detection. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2016.98"},{"key":"1109_CR22","unstructured":"Kr\u00e4henb\u00fchl, P., & Koltun, V. (2011) Efficient inference in fully connected CRFs with Gaussian ddge potentials. In Advances in neural information processing systems (NIPS) (pp. 109\u2013117)."},{"key":"1109_CR23","unstructured":"Krizhevsky, A., Sutskever, I., & Hinton, G. (2012). ImageNet classification with deep convolutional neural networks. In Advances in neural information processing systems (NIPS) (pp. 1097\u20131105)."},{"key":"1109_CR24","unstructured":"Lafferty, J., McCallum, A., & Pereira, F. (2001). Conditional random fields: Probabilistic models for segmenting and labeling sequence data. In Proceedings of the international conference on machine learning (ICML)."},{"issue":"4","key":"1109_CR25","doi-asserted-by":"crossref","first-page":"541","DOI":"10.1162\/neco.1989.1.4.541","volume":"1","author":"Y LeCun","year":"1989","unstructured":"LeCun, Y., Boser, B., Denker, J., Henderson, D., Howard, R., Hubbard, W., et al. (1989). Backpropagation applied to handwritten zip code recognition. Neural Computation, 1(4), 541\u2013551.","journal-title":"Neural Computation"},{"key":"1109_CR26","doi-asserted-by":"crossref","unstructured":"Li, Y., Qi, H., Dai, J., Ji, X., & Wei, Y. (2017). Fully convolutional instance-aware semantic segmentation. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.472"},{"key":"1109_CR27","doi-asserted-by":"crossref","unstructured":"Lin, D., Shen, X., Lu, C., & Jia, J. (2015). Deep LAC: Deep localization, alignment and classification for fine-grained recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR) (pp. 1666\u20131674).","DOI":"10.1109\/CVPR.2015.7298775"},{"key":"1109_CR28","doi-asserted-by":"crossref","unstructured":"Lin, T. Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., & Zitnick, L. (2014). Microsoft COCO: Common objects in context. In Proceedings of the IEEE European conference on computer vision (ECCV) (pp. 740\u2013755).","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"1109_CR29","doi-asserted-by":"crossref","unstructured":"Lin, T. Y., Doll\u00e1r, P., Girshick, R., He, K., Hariharan, B., & Belongie, S. (2017a). Feature pyramid networks for object detection. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.106"},{"key":"1109_CR30","doi-asserted-by":"crossref","unstructured":"Lin, T. Y., Goyal, P., Girshick, R., He, K., & Doll\u00e1r, P. (2017b). Focal loss for dense object detection. In Proceedings of the IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2017.324"},{"key":"1109_CR31","doi-asserted-by":"crossref","unstructured":"Liu, W., Anguelov, D., Erhan, D., Szegedy, C., & Reed, S.(2016). SSD: Single shot multibox detector. In Proceedings of the IEEE European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"1109_CR32","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., & Darrell, T. (2015). Fully convolutional networks for semantic segmentation. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR) (pp. 3431\u20133440).","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"1109_CR33","doi-asserted-by":"crossref","unstructured":"Mordan, T., Thome, N., Cord, M., & Henaff, G. (2017). Deformable part-based fully convolutional network for object detection. In Proceedings of the British machine vision conference (BMVC).","DOI":"10.5244\/C.31.88"},{"key":"1109_CR34","doi-asserted-by":"crossref","unstructured":"Ott, P., & Everingham, M. (2011). Shared parts for deformable part-based models. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR) (pp. 1513\u20131520).","DOI":"10.1109\/CVPR.2011.5995357"},{"key":"1109_CR35","doi-asserted-by":"crossref","unstructured":"Pinheiro, P., Lin, T. Y., Collobert, R., & Doll\u00e1r, P. (2016) Learning to refine object segments. In Proceedings of the IEEE European conference on computer vision (ECCV) (pp. 75\u201391).","DOI":"10.1007\/978-3-319-46448-0_5"},{"key":"1109_CR36","doi-asserted-by":"crossref","unstructured":"Redmon, J., & Farhadi, A. (2017). YOLO9000: Better, faster, stronger. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.690"},{"key":"1109_CR37","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., & Farhadi, A. (2016). You only look once: Unified, real-time object detection. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2016.91"},{"key":"1109_CR38","unstructured":"Ren, S., He, K., Girshick, R., & Sun, J. (2015). Faster R-CNN: Towards real-time object detection with region proposal networks. In Advances in neural information processing systems (NIPS) (pp. 91\u201399)."},{"issue":"3","key":"1109_CR39","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., Deng, J., Su, H., Krause, J., Satheesh, S., Ma, S., et al. (2015). ImageNet large scale visual recognition challenge. International Journal of Computer Vision (IJCV), 115(3), 211\u2013252.","journal-title":"International Journal of Computer Vision (IJCV)"},{"key":"1109_CR40","unstructured":"Savalle, P. A., Tsogkas, S., Papandreou, G., & Kokkinos, I. (2014). Deformable part models with CNN features. In Proceedings of the IEEE European conference on computer vision (ECCV), parts and attributes workshop."},{"key":"1109_CR41","doi-asserted-by":"crossref","unstructured":"Shrivastava, A., Gupta, A., & Girshick, R. (2016). Training region-based object detectors with online hard example mining. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2016.89"},{"key":"1109_CR42","doi-asserted-by":"crossref","unstructured":"Sicre, R., Avrithis, Y., Kijak, E., & Jurie, F. (2017). Unsupervised part learning for visual recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.332"},{"key":"1109_CR43","doi-asserted-by":"crossref","unstructured":"Simon, M., & Rodner, E. (2015). Neural activation constellations: Unsupervised part model discovery with convolutional networks. In Proceedings of the IEEE international conference on computer vision (ICCV) (pp. 1143\u20131151).","DOI":"10.1109\/ICCV.2015.136"},{"key":"1109_CR44","unstructured":"Simonyan, K., & Zisserman, A. (2015) Very deep convolutional networks for large-scale image recognition. In Proceedings of the international conference on learning representations (ICLR)."},{"issue":"3","key":"1109_CR45","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1007\/BF02289464","volume":"31","author":"L Tucker","year":"1966","unstructured":"Tucker, L. (1966). Some mathematical notes on three-mode factor analysis. Psychometrika, 31(3), 279\u2013311.","journal-title":"Psychometrika"},{"key":"1109_CR46","unstructured":"Wan, L., Eigen, D., & Fergus, R. (2015). End-to-end integration of a convolution network, deformable parts model and non-maximum suppression. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR) (pp. 851\u2013859)."},{"key":"1109_CR47","doi-asserted-by":"crossref","unstructured":"Wang, P., Shen, X., Lin, Z., Cohen, S., Price, B., & Yuille, A. L. (2015). Joint object and part segmentation using deep learned potentials. In Proceedings of the IEEE international conference on computer vision (ICCV) (pp. 1573\u20131581).","DOI":"10.1109\/ICCV.2015.184"},{"key":"1109_CR48","doi-asserted-by":"crossref","unstructured":"Xie, S., Girshick, R., Doll\u00e1r, P., Tu, Z., & He, K. (2017). Aggregated residual transformations for deep neural networks. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.634"},{"key":"1109_CR49","unstructured":"Yu, F., & Koltun, V. (2016). Multi-scale context aggregation by dilated convolutions. In Proceedings of the international conference on learning representations (ICLR)."},{"key":"1109_CR50","doi-asserted-by":"crossref","unstructured":"Zagoruyko, S., & Komodakis, N. (2016). Wide residual networks. In Proceedings of the British machine vision conference (BMVC).","DOI":"10.5244\/C.30.87"},{"key":"1109_CR51","doi-asserted-by":"crossref","unstructured":"Zagoruyko, S., Lerer, A., Lin, T. Y., Pinheiro, P., Gross, S., Chintala, S., & Dollar, P. (2016). A multipath network for object detection. In Proceedings of the British machine vision conference (BMVC).","DOI":"10.5244\/C.30.15"},{"key":"1109_CR52","doi-asserted-by":"crossref","unstructured":"Zhang, H., Xu, T., Elhoseiny, M., Huang, X., Zhang, S., Elgammal, A., & Metaxas, D. (2016). SPDA-CNN: Unifying semantic part detection and abstraction for fine-grained recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR) (pp. 1143\u20131152).","DOI":"10.1109\/CVPR.2016.129"},{"key":"1109_CR53","doi-asserted-by":"crossref","unstructured":"Zhang, N., Donahue, J., Girshick, R., & Darrell, T. (2014). Part-based R-CNNs for fine-grained category detection. In Proceedings of the IEEE European conference on computer vision (ECCV) (pp. 834\u2013849).","DOI":"10.1007\/978-3-319-10590-1_54"},{"key":"1109_CR54","doi-asserted-by":"crossref","unstructured":"Zheng, S., Jayasumana, S., Romera-Paredes, B., Vineet, V., Su, Z., Du, D., Huang, C., & Torr, P. (2015). Conditional random fields as recurrent neural networks. In Proceedings of the IEEE international conference on computer vision (ICCV) (pp. 1529\u20131537).","DOI":"10.1109\/ICCV.2015.179"},{"key":"1109_CR55","unstructured":"Zhu, L., Chen, Y., Yuille, A., & Freeman, W. (2010). Latent hierarchical structural learning for object detection. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR) (pp. 1062\u20131069)."}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-018-1109-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11263-018-1109-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-018-1109-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,12,16]],"date-time":"2019-12-16T12:01:02Z","timestamp":1576497662000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11263-018-1109-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,7,17]]},"references-count":55,"journal-issue":{"issue":"11-12","published-print":{"date-parts":[[2019,12]]}},"alternative-id":["1109"],"URL":"https:\/\/doi.org\/10.1007\/s11263-018-1109-z","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"type":"print","value":"0920-5691"},{"type":"electronic","value":"1573-1405"}],"subject":[],"published":{"date-parts":[[2018,7,17]]},"assertion":[{"value":"20 February 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 July 2018","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 July 2018","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}