{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,28]],"date-time":"2025-10-28T14:54:20Z","timestamp":1761663260317,"version":"3.40.3"},"publisher-location":"Cham","reference-count":51,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319056951"},{"type":"electronic","value":"9783319056968"}],"license":[{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-319-05696-8_2","type":"book-chapter","created":{"date-parts":[[2014,5,9]],"date-time":"2014-05-09T14:19:39Z","timestamp":1399645179000},"page":"29-52","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["Bag-of-Words Image Representation: Key Ideas and Further Insight"],"prefix":"10.1007","author":[{"given":"Marc T.","family":"Law","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nicolas","family":"Thome","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Matthieu","family":"Cord","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2014,3,26]]},"reference":[{"key":"2_CR1","doi-asserted-by":"crossref","unstructured":"Avila S, Thome N, Cord M, Valle E, de Araujo A (2011) Bossa: extended bow formalism for image classification. In: Proceedings of the IEEE international conference on image processing (ICIP)","DOI":"10.1109\/ICIP.2011.6116268"},{"key":"2_CR2","doi-asserted-by":"crossref","unstructured":"Bach FR, Lanckriet GR, Jordan MI (2004) Multiple kernel learning, conic duality, and the SMO algorithm. In: Proceedings of the twenty-first international conference on machine learning (ICML)","DOI":"10.1145\/1015330.1015424"},{"issue":"3","key":"2_CR3","doi-asserted-by":"publisher","first-page":"346","DOI":"10.1016\/j.cviu.2007.09.014","volume":"110","author":"H Bay","year":"2008","unstructured":"Bay H, Ess A, Tuytelaars T, van Gool L (2008) SURF: speeded Up robust features. Comput Vis Image Underst (CVIU) 110(3):346\u2013359","journal-title":"Comput Vis Image Underst (CVIU)"},{"key":"2_CR4","doi-asserted-by":"crossref","unstructured":"Benois-Pineau J, Bugeau A, Karaman S, M\u00e9gret R (2012) Spatial and multi-resolution context in visual indexing. In: Visual Indexing and Retrieval, pp 41\u201363","DOI":"10.1007\/978-1-4614-3588-4_4"},{"key":"2_CR5","doi-asserted-by":"crossref","unstructured":"Boureau Y-L, Bach, F, LeCun Y, Ponce J (2010) Learning mid-level features for recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2010.5539963"},{"key":"2_CR6","doi-asserted-by":"crossref","unstructured":"Boureau Y-L, Le Roux N, Bach F, Ponce J, LeCun Y (2011) Ask the locals: multi-way local pooling for image recognition. In: Proceedings of the IEEE international conference on computer vision (ICCV)","DOI":"10.1109\/ICCV.2011.6126555"},{"key":"2_CR7","unstructured":"Boureau Y-L, Ponce J, LeCun Y (2010) A theoretical analysis of feature pooling in vision algorithms. In: Proceedings of the international conference on machine learning (ICML)"},{"key":"2_CR8","doi-asserted-by":"crossref","unstructured":"Chatfield K, Lempitsky V, Vedaldi A, Zisserman A (2011) The devil is in the details: an evaluation of recent feature encoding methods. In: Proceedings of the British machine vision conference (BMVC)","DOI":"10.5244\/C.25.76"},{"key":"2_CR9","unstructured":"Coates A, Ng A (2011) The importance of encoding versus training with sparse coding and vector quantization. In: Proceedings of the 28th international conference on machine learning (ICML)"},{"key":"2_CR10","doi-asserted-by":"crossref","unstructured":"Cord M, Cunningham P (2008) Machine learning techniques for multimedia: case studies on organization and retrieval. Machine learning techniques for multimedia, cognitive technologies. Springer, Heidelberg","DOI":"10.1007\/978-3-540-75171-7"},{"issue":"3","key":"2_CR11","first-page":"273","volume":"20","author":"C Cortes","year":"1995","unstructured":"Cortes C, Vapnik V (1995) Support-vector networks. Mach Learn 20(3):273\u2013297","journal-title":"Mach Learn"},{"key":"2_CR12","unstructured":"Dalal N, Triggs B (2005) Histograms of oriented gradients for human detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)"},{"key":"2_CR13","doi-asserted-by":"crossref","unstructured":"Duchenne O, Joulin A, Ponce J (2011) A graph-matching kernel for object categorization. In: Proceedings of the IEEE international conference on computer vision (ICCV)","DOI":"10.1109\/ICCV.2011.6126445"},{"key":"2_CR14","unstructured":"Everingham M, Zisserman A, Williams C, Van Gool L (2007) The PASCAL visual obiect classes challenge 2007 (VOC2007) results. Technical Report, Pascal Challenge"},{"key":"2_CR15","first-page":"1871","volume":"9","author":"RE Fan","year":"2008","unstructured":"Fan RE, Chang KW, Hsieh CJ, Wang XR, Lin CJ (2008) LIBLINEAR: a library for large linear classification. J Mach Learn Res (JMLR) 9:1871\u20131874","journal-title":"J Mach Learn Res (JMLR)"},{"key":"2_CR16","doi-asserted-by":"crossref","unstructured":"de Avila Fontes SE, Thome N, Cord M, Valle E, de Albuquerque Arajo A (2013) Pooling in image representation: The visual codeword point of view. Comp Vis Image Underst 117(5):453\u2013465","DOI":"10.1016\/j.cviu.2012.09.007"},{"key":"2_CR17","unstructured":"Fei-fei L (2005) A bayesian hierarchical model for learning natural scene categories. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)"},{"key":"2_CR18","unstructured":"Fei-Fei L, Fergus R, Perona P (2004) Learning generative visual models from few training examples: an incremental bayesian approach tested on 101 object categories. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR) workshop on GMBV"},{"key":"2_CR19","unstructured":"Feng J, Ni B, Tian Q, Yan S (2011) Geometric $$\\ell _p$$-norm feature pooling for image classification. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)"},{"key":"2_CR20","doi-asserted-by":"crossref","unstructured":"Gehler P, Nowozin S (2009) On feature combination for multiclass object classification. In: Proceedings of the IEEE international conference on computer vision (ICCV)","DOI":"10.1109\/ICCV.2009.5459169"},{"issue":"7","key":"2_CR21","doi-asserted-by":"crossref","first-page":"1271","DOI":"10.1109\/TPAMI.2009.132","volume":"32","author":"J Gemert","year":"2010","unstructured":"van Gemert J, Veenman C, Smeulders A, Geusebroek JM (2010) Visual word ambiguity. IEEE Trans Pattern Anal Mach Intell (TPAMI) 32(7):1271\u20131283","journal-title":"IEEE Trans Pattern Anal Mach Intell (TPAMI)"},{"key":"2_CR22","doi-asserted-by":"crossref","unstructured":"Goh H, Thome N, Cord M, Lim J-H (2012) Unsupervised and supervised visual codes with restricted Boltzmann machines. In: Proceedings of the European conference on computer vision (ECCV)","DOI":"10.1007\/978-3-642-33715-4_22"},{"key":"2_CR23","doi-asserted-by":"crossref","unstructured":"Gonz\u00e1lez-D\u00edaz I, Buso V, Benois-Pineau J, Bourmaud G, Megret R (2013) Modeling instrumental activities of daily livinf in egocentric vision as sequences of active objects and context for Alzheimer disease research. In: ACM multimedia workshop on multimedia information indexing and retrieval for healthcare","DOI":"10.1145\/2505323.2505328"},{"key":"2_CR24","doi-asserted-by":"crossref","unstructured":"Grauman K, Darrell T (2005) The pyramid match kernel: discriminative classification with sets of image features. In: Proceedings of the IEEE international conference on computer vision (ICCV)","DOI":"10.1109\/ICCV.2005.239"},{"key":"2_CR25","doi-asserted-by":"crossref","unstructured":"Harris S, Stephens M (1988) A combined corner and edge detector. In: Proceedings of the 4th Alvey vision conference, pp 147\u2013151","DOI":"10.5244\/C.2.23"},{"key":"2_CR26","doi-asserted-by":"crossref","unstructured":"J\u00e9gou H, Douze M, Schmid C, P\u00e9rez P (2010) Aggregating local descriptors into a compact image representation. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2010.5540039"},{"key":"2_CR27","doi-asserted-by":"crossref","unstructured":"Karaman S, Benois-Pineau J, Mgret R, Bugeau A (2012) Multi-layer local graph words for object recognition. In: Proceedings of the international conference on multimedia modeling","DOI":"10.1007\/978-3-642-27355-1_6"},{"key":"2_CR28","unstructured":"Kavukcuoglu K, Sermanet P, Boureau Y-L, Gregor K, Mathieu M, LeCun Y (2010) Learning convolutional feature hierachies for visual recognition. In: Proceedings of advances in neural information processing systems (NIPS), pp 1090\u20131098"},{"key":"2_CR29","unstructured":"Krizhevsky A, Sutskever I, Hinton G (2012) Imagenet classification with deep convolutional neural networks. In: Proceedings of advances in neural information processing systems (NIPS), pp. 1106\u20131114"},{"key":"2_CR30","unstructured":"Lazebnik S, Schmid C, Ponce J (2006) Beyond bags of features: spatial pyramid matching for recognizing natural scene categories. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)"},{"key":"2_CR31","unstructured":"Liu L, Wang L, Liu X (2011) In defense of soft-assignment coding. In: Proceedings of the IEEE international conference on computer vision (ICCV)"},{"key":"2_CR32","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"D Lowe","year":"2004","unstructured":"Lowe D (2004) Distinctive image features from scale-invariant keypoints. Int J Comput Vis (IJCV) 60:91\u2013110","journal-title":"Int J Comput Vis (IJCV)"},{"issue":"1","key":"2_CR33","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1023\/B:VISI.0000027790.02288.f2","volume":"60","author":"K Mikolajczyk","year":"2004","unstructured":"Mikolajczyk K, Schmid C (2004) Scale and affine invariant interest point detectors. Int J Comput Vis (IJCV) 60(1):63\u201386","journal-title":"Int J Comput Vis (IJCV)"},{"key":"2_CR34","doi-asserted-by":"crossref","unstructured":"Mironica I, Uijlings J, Rostamzadeh N, Ionescu B, Sebe N (2013) Time matters! capturing variation in time in video using fisher kernels. In: Proceedings of the 21st ACM international conference on multimedia","DOI":"10.1145\/2502081.2502183"},{"key":"2_CR35","doi-asserted-by":"crossref","unstructured":"Perronnin F, Dance CR (2007) Fisher kernels on visual vocabularies for image categorization. In: Proceedings of the IEEE Conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2007.383266"},{"key":"2_CR36","doi-asserted-by":"crossref","unstructured":"Perronnin F, S\u00e1nchez J, Mensink T (2010) Improving the fisher kernel for large-scale image classification. In: Proceedings of the European conference on computer vision (ECCV)","DOI":"10.1007\/978-3-642-15561-1_11"},{"key":"2_CR37","doi-asserted-by":"publisher","first-page":"411","DOI":"10.1109\/TPAMI.2007.56","volume":"29","author":"T Serre","year":"2007","unstructured":"Serre T, Wolf L, Bileschi S, Riesenhuber M, Poggio T (2007) Robust object recognition with cortex-like mechanisms. IEEE Trans Pattern Anal Mach Intell (TPAMI) 29:411\u2013426","journal-title":"IEEE Trans Pattern Anal Mach Intell (TPAMI)"},{"key":"2_CR38","doi-asserted-by":"crossref","unstructured":"Sharma G, Jurie F, Schmid C (2012) Discriminative spatial saliency for image classification. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2012.6248093"},{"key":"2_CR39","doi-asserted-by":"crossref","unstructured":"Sivic J, Zisserman A (2003) Video google: a text retrieval approach to object matching in videos. In: Proceedings of the IEEE international conference on computer vision (ICCV)","DOI":"10.1109\/ICCV.2003.1238663"},{"key":"2_CR40","doi-asserted-by":"crossref","unstructured":"Smith JR, Chang S-F (1997) VisualSEEk: a fully automated content-based image query system. In: Proceedings of the fourth ACM international conference on Multimedia, ACM, pp 87\u201398","DOI":"10.1145\/244130.244151"},{"issue":"2","key":"2_CR41","doi-asserted-by":"crossref","first-page":"91","DOI":"10.1145\/1142020.1142021","volume":"2","author":"C Snoek","year":"2006","unstructured":"Snoek C, Worring M, Hauptmann A (2006) Learning rich semantics from news video archives by style analysis. ACM Transa Multimedia Comput Commun Appl (TOMCCAP) 2(2):91\u2013108","journal-title":"ACM Transa Multimedia Comput Commun Appl (TOMCCAP)"},{"issue":"2","key":"2_CR42","doi-asserted-by":"publisher","first-page":"764","DOI":"10.1109\/TIP.2012.2222900","volume":"22","author":"C Th\u00e9riault","year":"2013","unstructured":"Th\u00e9riault C, Thome N, Cord M (2013) Extended coding and pooling in the HMAX model. IEEE Trans Image Process 22(2):764\u2013777","journal-title":"IEEE Trans Image Process"},{"issue":"9","key":"2_CR43","doi-asserted-by":"publisher","first-page":"1582","DOI":"10.1109\/TPAMI.2009.154","volume":"32","author":"KEA van de Sande","year":"2010","unstructured":"van de Sande KEA, Gevers T, Snoek CGM (2010) Evaluating color descriptors for object and scene recognition. IEEE Trans Pattern Anal Mach Intell (TPAMI) 32(9):1582\u20131596","journal-title":"IEEE Trans Pattern Anal Mach Intell (TPAMI)"},{"key":"2_CR44","unstructured":"Vedaldi A, Fulkerson B (2008) VLFeat: an open and portable library of computer vision algorithms. http:\/\/www.vlfeat.org\/"},{"key":"2_CR45","doi-asserted-by":"crossref","unstructured":"Vedaldi A, Gulshan V, Varma M, Zisserman A (2009) Multiple kernels for object detection. In: Proceedings of the IEEE international conference on computer vision (ICCV)","DOI":"10.1109\/ICCV.2009.5459183"},{"key":"2_CR46","doi-asserted-by":"crossref","first-page":"480","DOI":"10.1109\/TPAMI.2011.153","volume":"34","author":"A Vedaldi","year":"2011","unstructured":"Vedaldi A, Zisserman A (2011) Efficient additive kernels via explicit feature maps. IEEE Trans Pattern Anal Mach Intell (TPAMI) 34:480\u2013492","journal-title":"IEEE Trans Pattern Anal Mach Intell (TPAMI)"},{"key":"2_CR47","doi-asserted-by":"crossref","unstructured":"Vig E, Dorr, M, Cox DD (2012) Space-variant descriptor sampling for action recognition based on saliency and eye movements. In: Proceedings of the European conference on computer vision (ECCV)","DOI":"10.1007\/978-3-642-33786-4_7"},{"key":"2_CR48","doi-asserted-by":"crossref","unstructured":"Wang J, Yang J, Yu K, Lv F, Huang T, Gong Y (2010) Locality-constrained linear coding for image classification. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2010.5540018"},{"key":"2_CR49","unstructured":"Yang J, Yu K, Gong Y, Huang T (2009) Linear spatial pyramid matching using sparse coding for image classification. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)"},{"key":"2_CR50","unstructured":"Zhang H, Berg AC, Maire M, Malik J (2006) SVM-KNN: discriminative nearest neighbor classification for visual category recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)"},{"key":"2_CR51","doi-asserted-by":"crossref","unstructured":"Zhou X, Yu K, Zhang T, Huang TS (2010) Image classification using super-vector coding of local image descriptors. In: Proceedings of the european conference on computer vision (ECCV)","DOI":"10.1007\/978-3-642-15555-0_11"}],"container-title":["Advances in Computer Vision and Pattern Recognition","Fusion in Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-05696-8_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,7]],"date-time":"2023-02-07T22:17:33Z","timestamp":1675808253000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-05696-8_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783319056951","9783319056968"],"references-count":51,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-05696-8_2","relation":{},"ISSN":["2191-6586","2191-6594"],"issn-type":[{"type":"print","value":"2191-6586"},{"type":"electronic","value":"2191-6594"}],"subject":[],"published":{"date-parts":[[2014]]},"assertion":[{"value":"26 March 2014","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}