{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,28]],"date-time":"2025-10-28T05:29:07Z","timestamp":1761629347717,"version":"3.41.0"},"reference-count":54,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2015,3,22]],"date-time":"2015-03-22T00:00:00Z","timestamp":1426982400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Multimed Info Retr"],"published-print":{"date-parts":[[2015,6]]},"DOI":"10.1007\/s13735-015-0077-0","type":"journal-article","created":{"date-parts":[[2015,3,21]],"date-time":"2015-03-21T04:48:00Z","timestamp":1426913280000},"page":"75-93","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":31,"title":["On-the-fly learning for visual search of large-scale image and video datasets"],"prefix":"10.1007","volume":"4","author":[{"given":"Ken","family":"Chatfield","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Relja","family":"Arandjelovi\u0107","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Omkar","family":"Parkhi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Andrew","family":"Zisserman","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2015,3,22]]},"reference":[{"key":"77_CR1","doi-asserted-by":"crossref","unstructured":"Arandjelovi\u0107 R, Zisserman A (2012) Multiple queries for large scale specific object retrieval. In: Proceedings of BMVC","DOI":"10.5244\/C.26.92"},{"key":"77_CR2","doi-asserted-by":"crossref","unstructured":"Arandjelovi\u0107 R, Zisserman A (2012) Three things everyone should know to improve object retrieval. In: Proceedings of CVPR","DOI":"10.1109\/CVPR.2012.6248018"},{"key":"77_CR3","doi-asserted-by":"crossref","unstructured":"Bauml M, Tapaswi M, Stiefelhagen R (2014) A time pooled track kernel for person identification. In: Proceedings of the 11th international conference on advanced video and signal-Based surveillance (AVSS). IEEE","DOI":"10.1109\/AVSS.2014.6918636"},{"key":"77_CR4","doi-asserted-by":"crossref","unstructured":"Berg TL, Forsyth DA (2006) Animals on the web. In: Proceedings of CVPR","DOI":"10.1109\/CVPR.2006.57"},{"key":"77_CR5","unstructured":"Bergamo A, Torresani L, Fitzgibbon A (2011) PiCoDes: learning a compact code for novel-category recognition. In: NIPS, pp 2088\u20132096"},{"key":"77_CR6","doi-asserted-by":"crossref","unstructured":"Chatfield K, Lempitsky V, Vedaldi A, Zisserman A (2011) The devil is in the details: an evaluation of recent feature encoding methods. In: Proceedings of BMVC","DOI":"10.5244\/C.25.76"},{"key":"77_CR7","doi-asserted-by":"crossref","unstructured":"Chatfield K, Simonyan K, Vedaldi A, Zisserman A (2014) Return of the devil in the details: Delving deep into convolutional nets. In: Proceedings of BMVC","DOI":"10.5244\/C.28.6"},{"key":"77_CR8","doi-asserted-by":"crossref","unstructured":"Chatfield K, Simonyan K, Zisserman A (2014) Efficient on-the-fly category retrieval using convnets and GPUs. In: Proceedings of ACCV, lecture notes in computer science. Springer","DOI":"10.1007\/978-3-319-16865-4_9"},{"key":"77_CR9","doi-asserted-by":"crossref","unstructured":"Chatfield K, Zisserman A (2012) Visor: Towards on-the-fly large-scale object category retrieval. In: Proceedings of ACCV, lecture notes in computer science. Springer","DOI":"10.1007\/978-3-642-37444-9_34"},{"key":"77_CR10","doi-asserted-by":"crossref","unstructured":"Chum O, Mikulik A, Per\u010foch M, Matas J (2011) Total recall II: query expansion revisited. In: Proceedings of CVPR","DOI":"10.1109\/CVPR.2011.5995601"},{"key":"77_CR11","doi-asserted-by":"crossref","unstructured":"Chum O, Philbin J, Sivic J, Isard M, Zisserman A (2007) Total recall: automatic query expansion with a generative feature model for object retrieval. In: Proceedings of ICCV","DOI":"10.1109\/ICCV.2007.4408891"},{"key":"77_CR12","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R, Li LJ, Li K, Fei-Fei L (2009) Imagenet: a large-scale hierarchical image database. In: Proceedings of CVPR","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"77_CR13","unstructured":"Donahue J, Jia Y, Vinyals O, Hoffman J, Zhang N, Tzeng E, Darrell T (2013) Decaf: a deep convolutional activation feature for generic visual recognition. CoRR. arXiv:1310.1531"},{"issue":"5","key":"77_CR14","doi-asserted-by":"crossref","first-page":"545","DOI":"10.1016\/j.imavis.2008.04.018","volume":"27","author":"M Everingham","year":"2009","unstructured":"Everingham M, Sivic J, Zisserman A (2009) Taking the bite out of automatic naming of characters in TV video. Image Vis Comput 27(5):545\u2013559","journal-title":"Image Vis Comput"},{"issue":"2","key":"77_CR15","doi-asserted-by":"crossref","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham M, Van Gool L, Williams CKI, Winn J, Zisserman A (2010) The PASCAL visual object classes (VOC) challenge. IJCV 88(2):303\u2013338","journal-title":"IJCV"},{"key":"77_CR16","doi-asserted-by":"crossref","unstructured":"Fergus R, Fei-Fei L, Perona P, Zisserman A (2005) Learning object categories from Google\u2019s image search. In: Proceedings of ICCV","DOI":"10.1109\/ICCV.2005.142"},{"issue":"8","key":"77_CR17","doi-asserted-by":"crossref","first-page":"1453","DOI":"10.1109\/JPROC.2010.2048990","volume":"98","author":"R Fergus","year":"2010","unstructured":"Fergus R, Fei-Fei L, Perona P, Zisserman A (2010) Learning object categories from internet image searches. Proc IEEE 98(8):1453\u20131466","journal-title":"Proc IEEE"},{"key":"77_CR18","doi-asserted-by":"crossref","unstructured":"Fernando B, Tuytelaars T (2013) Mining multiple queries for image retrieval: on-the-fly learning of an object-specific mid-level representation. In: Proceedings of ICCV","DOI":"10.1109\/ICCV.2013.316"},{"key":"77_CR19","doi-asserted-by":"crossref","unstructured":"Huiskes MJ, Lew MS (2008) The mir flickr retrieval evaluation. In: MIR \u201908: Proceedings of the 2008 ACM international conference on multimedia information retrieval","DOI":"10.1145\/1460096.1460104"},{"key":"77_CR20","doi-asserted-by":"crossref","unstructured":"Huiskes MJ, Thomee B, Lew MS (2010) New trends and ideas in visual concept detection: the mir flickr retrieval evaluation initiative. In: MIR \u201910: Proceedings of the 2010 ACM international conference on multimedia information retrieval, pp 527\u2013536","DOI":"10.1145\/1743384.1743475"},{"issue":"4","key":"77_CR21","doi-asserted-by":"crossref","first-page":"1031","DOI":"10.1109\/TMM.2012.2187180","volume":"14","author":"N Ikizler-Cinbis","year":"2012","unstructured":"Ikizler-Cinbis N, Sclaroff S (2012) Web-based classifiers for human action recognition. Multimed IEEE Trans 14(4):1031\u20131045","journal-title":"Multimed IEEE Trans"},{"key":"77_CR22","doi-asserted-by":"crossref","unstructured":"J\u00e9gou H, Chum O (2012) Negative evidences and co-occurrences in image retrieval: the benefit of PCA and whitening. In: Proceedings of ECCV","DOI":"10.1007\/978-3-642-33709-3_55"},{"key":"77_CR23","doi-asserted-by":"crossref","unstructured":"J\u00e9gou H, Douze M, Schmid C (2008) Hamming embedding and weak geometric consistency for large scale image search. In: Proceedings of ECCV, pp 304\u2013317","DOI":"10.1007\/978-3-540-88682-2_24"},{"key":"77_CR24","doi-asserted-by":"crossref","unstructured":"J\u00e9gou H, Douze M, Schmid C (2009) On the burstiness of visual elements. In: Proceedings of CVPR","DOI":"10.1109\/CVPR.2009.5206609"},{"issue":"3","key":"77_CR25","doi-asserted-by":"crossref","first-page":"316","DOI":"10.1007\/s11263-009-0285-2","volume":"87","author":"H J\u00e9gou","year":"2010","unstructured":"J\u00e9gou H, Douze M, Schmid C (2010) Improving bag-of-features for large scale image search. IJCV 87(3):316\u2013336","journal-title":"IJCV"},{"key":"77_CR26","doi-asserted-by":"crossref","unstructured":"J\u00e9gou H, Douze M, Schmid C (2011) Product quantization for nearest neighbor search. IEEE PAMI","DOI":"10.1109\/TPAMI.2010.57"},{"key":"77_CR27","doi-asserted-by":"crossref","unstructured":"Jia Y, Schelhamer E, Donahue J, Karayev S, Long J, Girshick R, Guadarrama S, Darrell T (2014) Caffe: convolutional architecture for fast feature embedding. arXiv:1408.5093","DOI":"10.1145\/2647868.2654889"},{"key":"77_CR28","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) ImageNet classification with deep convolutional neural networks. In: NIPS, pp 1106\u20131114"},{"key":"77_CR29","doi-asserted-by":"crossref","unstructured":"Kumar N, Seitz S (2014) Photo recall: using the internet to label your photos. In: The 23rd international conference on world wide web companion","DOI":"10.1145\/2567948.2577360"},{"key":"77_CR30","doi-asserted-by":"crossref","unstructured":"Kumar N, Seitz S (2014) Photo recall: Using the internet to label your photos. In: 2nd workshop on web-scale vision and social media (VSM) at CVPR 2014","DOI":"10.1109\/CVPRW.2014.120"},{"key":"77_CR31","doi-asserted-by":"crossref","unstructured":"Li J, Wang G, Fei-Fei L (2007) OPTIMOL: automatic object Picture collection via incremental model learning. In: Proceedings of CVPR","DOI":"10.1109\/CVPR.2007.383048"},{"issue":"4","key":"77_CR32","doi-asserted-by":"crossref","first-page":"933","DOI":"10.1109\/TMM.2013.2238523","volume":"15","author":"X Li","year":"2013","unstructured":"Li X, Snoek CGM, Worring M, Koelma D, Smeulders AWM (2013) Bootstrapping visual categorization with relevant negatives. IEEE Trans Multimed 15(4):933\u2013945","journal-title":"IEEE Trans Multimed"},{"key":"77_CR33","doi-asserted-by":"crossref","unstructured":"Lin WH, Jin R, Hauptmann A (2003) Web image retrieval re-ranking with relevance model. In: Proceedings of ICWI","DOI":"10.1109\/WI.2003.1241200"},{"key":"77_CR34","doi-asserted-by":"crossref","unstructured":"Liu Y, Xu D, Tsang IW, Luo J (2009) Using large-scale web data to facilitate textual query based retrieval of consumer photos. In: Proceedings of the 17th ACM international conference on multimedia, MM \u201909, pp 55\u201364","DOI":"10.1145\/1631272.1631283"},{"issue":"60","key":"77_CR35","doi-asserted-by":"crossref","first-page":"63","DOI":"10.1023\/B:VISI.0000027790.02288.f2","volume":"1","author":"K Mikolajczyk","year":"2004","unstructured":"Mikolajczyk K, Schmid C (2004) Scale and affine invariant interest point detectors. IJCV 1(60):63\u201386","journal-title":"IJCV"},{"key":"77_CR36","unstructured":"Over P, Awad G, Michel M, Fiscus J, Sanders G, Kraaij W, Smeaton AF, Quenot G (2012) Trecvid 2012\u2014an overview of the goals, tasks, data, evaluation mechanisms and metrics. In: Proceedings of TRECVID 2012"},{"key":"77_CR37","unstructured":"Over P, Awad G, Michel M, Fiscus J, Sanders G, Kraaij W, Smeaton AF, Quenot G (2013) Trecvid 2013\u2014an overview of the goals, tasks, data, evaluation mechanisms and metrics. In: Proceedings of TRECVID 2013"},{"key":"77_CR38","doi-asserted-by":"crossref","unstructured":"Parkhi OM, Simonyan K, Vedaldi A, Zisserman A (2014) A compact and discriminative face track descriptor. In: Proceedings of CVPR. IEEE","DOI":"10.1109\/CVPR.2014.219"},{"key":"77_CR39","doi-asserted-by":"crossref","unstructured":"Parkhi OM, Vedaldi A, Zisserman A (2012) On-the-fly specific person retrieval. In: International workshop on image analysis for multimedia interactive services. IEEE","DOI":"10.1109\/WIAMIS.2012.6226775"},{"key":"77_CR40","doi-asserted-by":"crossref","unstructured":"Per\u010foch M, Chum O, Matas J (2009) Efficient representation of local geometry for large scale object retrieval. In: Proceedings of CVPR","DOI":"10.1109\/CVPR.2009.5206529"},{"key":"77_CR41","doi-asserted-by":"crossref","unstructured":"Perronnin F, S\u00e1nchez J, Mensink T (2010) Improving the Fisher kernel for large-scale image classification. In: Proceedings of ECCV","DOI":"10.1007\/978-3-642-15561-1_11"},{"key":"77_CR42","doi-asserted-by":"crossref","unstructured":"Philbin J, Chum O, Isard M, Sivic J, Zisserman A (2007) Object retrieval with large vocabularies and fast spatial matching. In: Proceedings of CVPR","DOI":"10.1109\/CVPR.2007.383172"},{"key":"77_CR43","doi-asserted-by":"crossref","unstructured":"Philbin J, Chum O, Isard M, Sivic J, Zisserman A (2008) Lost in quantization: improving particular object retrieval in large scale image databases. In: Proceedings of CVPR","DOI":"10.1109\/CVPR.2008.4587635"},{"key":"77_CR44","doi-asserted-by":"crossref","unstructured":"S\u00e1nchez J, Perronnin F (2011) High-dimensional signature compression for large-scale image classification. In: Proceedings of CVPR","DOI":"10.1109\/CVPR.2011.5995504"},{"issue":"4","key":"77_CR45","doi-asserted-by":"crossref","first-page":"754","DOI":"10.1109\/TPAMI.2010.133","volume":"33","author":"F Schroff","year":"2011","unstructured":"Schroff F, Criminisi A, Zisserman A (2011) Harvesting image databases from the web. IEEE PAMI 33(4):754\u2013766","journal-title":"IEEE PAMI"},{"key":"77_CR46","doi-asserted-by":"crossref","unstructured":"Sivic J, Everingham M, Zisserman A (2009) \u201cWho are you?\u201d\u2014learning person specific classifiers from video. In: Proceedings of CVPR","DOI":"10.1109\/CVPR.2009.5206513"},{"key":"77_CR47","first-page":"1470","volume":"2","author":"J Sivic","year":"2003","unstructured":"Sivic J, Zisserman A (2003) Video Google: a text retrieval approach to object matching in videos. Proc ICCV 2:1470\u20131477","journal-title":"Proc ICCV"},{"issue":"4","key":"77_CR48","doi-asserted-by":"crossref","first-page":"548","DOI":"10.1109\/JPROC.2008.916343","volume":"96","author":"J Sivic","year":"2008","unstructured":"Sivic J, Zisserman A (2008) Efficient visual search for objects in videos. Proc IEEE 96(4):548\u2013566","journal-title":"Proc IEEE"},{"key":"77_CR49","doi-asserted-by":"crossref","unstructured":"Tapaswi M, Bauml M, Stiefelhagen R (2014) Story-based video retrieval in TV series using plot synopses. In: ACM international conference on multimedia retrieval (ICMR)","DOI":"10.1145\/2578726.2578727"},{"key":"77_CR50","unstructured":"Tolias G, J\u00e9gou H (2013) Local visual query expansion: exploiting an image collection to refine local descriptors. Technical report RR-8325, INRIA"},{"key":"77_CR51","doi-asserted-by":"crossref","unstructured":"Tolias G, J\u00e9gou H (2014) Visual query expansion with or without geometry: refining local descriptors by feature aggregation. Pattern Recognit","DOI":"10.1016\/j.patcog.2014.04.007"},{"key":"77_CR52","doi-asserted-by":"crossref","unstructured":"Torresani L, Szummer M, Fitzgibbon A (2010) Efficient object category recognition using classemes. In: Proceedings of ECCV, pp 776\u2013789","DOI":"10.1007\/978-3-642-15549-9_56"},{"key":"77_CR53","doi-asserted-by":"crossref","unstructured":"Zeiler MD, Fergus R (2014) Visualizing and understanding convolutional networks. In: Proceedings of ECCV 2014, vol 8689. Springer, pp 818\u2013833","DOI":"10.1007\/978-3-319-10590-1_53"},{"issue":"2","key":"77_CR54","doi-asserted-by":"crossref","first-page":"6","DOI":"10.1145\/1132956.1132959","volume":"38","author":"J Zobel","year":"2006","unstructured":"Zobel J, Moffat A (2006) Inverted files for text search engines. ACM Comput Surv 38(2):6","journal-title":"ACM Comput Surv"}],"container-title":["International Journal of Multimedia Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s13735-015-0077-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s13735-015-0077-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s13735-015-0077-0","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,21]],"date-time":"2025-05-21T08:14:31Z","timestamp":1747815271000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s13735-015-0077-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,3,22]]},"references-count":54,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2015,6]]}},"alternative-id":["77"],"URL":"https:\/\/doi.org\/10.1007\/s13735-015-0077-0","relation":{},"ISSN":["2192-6611","2192-662X"],"issn-type":[{"type":"print","value":"2192-6611"},{"type":"electronic","value":"2192-662X"}],"subject":[],"published":{"date-parts":[[2015,3,22]]}}}