{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,21]],"date-time":"2025-06-21T18:41:35Z","timestamp":1750531295582,"version":"3.37.3"},"publisher-location":"Berlin, Heidelberg","reference-count":48,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642128998"},{"type":"electronic","value":"9783642129001"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2010]]},"DOI":"10.1007\/978-3-642-12900-1_14","type":"book-chapter","created":{"date-parts":[[2010,4,19]],"date-time":"2010-04-19T14:38:27Z","timestamp":1271687907000},"page":"357-386","source":"Crossref","is-referenced-by-count":9,"title":["YouTube Scale, Large Vocabulary Video Annotation"],"prefix":"10.1007","author":[{"given":"Nicholas","family":"Morsillo","sequence":"first","affiliation":[]},{"given":"Gideon","family":"Mann","sequence":"additional","affiliation":[]},{"given":"Christopher","family":"Pal","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"issue":"1","key":"14_CR1","doi-asserted-by":"publisher","first-page":"117","DOI":"10.1145\/1327452.1327494","volume":"51","author":"A. Andoni","year":"2008","unstructured":"Andoni, A., Indyk, P.: Near-optimal hashing algorithms for approximate nearest neighbor in high dimensions. Communications of the ACM\u00a051(1), 117\u2013122 (2008)","journal-title":"Communications of the ACM"},{"key":"14_CR2","doi-asserted-by":"publisher","first-page":"895","DOI":"10.1145\/1367497.1367618","volume-title":"WWW 2008: Proceeding of the 17th international conference on World Wide Web","author":"S. Baluja","year":"2008","unstructured":"Baluja, S., Seth, R., Sivakumar, D., Jing, Y., Yagnik, J., Kumar, S., Ravichandran, D., Aly, M.: Video suggestion and discovery for youtube: taking random walks through the view graph. In: WWW 2008: Proceeding of the 17th international conference on World Wide Web, pp. 895\u2013904. ACM, New York (2008)"},{"key":"14_CR3","doi-asserted-by":"publisher","first-page":"245","DOI":"10.1145\/502512.502546","volume-title":"Proceedings of the seventh ACM SIGKDD international conference on Knowledge discovery and data mining","author":"E. Bingham","year":"2001","unstructured":"Bingham, E., Mannila, H.: Random projection in dimensionality reduction: applications to image and text data. In: Proceedings of the seventh ACM SIGKDD international conference on Knowledge discovery and data mining, pp. 245\u2013250. ACM, New York (2001)"},{"key":"14_CR4","doi-asserted-by":"crossref","first-page":"257","DOI":"10.1109\/34.910878","volume":"23","author":"A. Bobick","year":"2001","unstructured":"Bobick, A., Davis, J.W.: The recognition of human movement using temporal templates. IEEE PAMI\u00a023, 257\u2013267 (2001)","journal-title":"IEEE PAMI"},{"key":"14_CR5","doi-asserted-by":"crossref","unstructured":"Boiman, O., Shechtman, E., Irani, M.: In defense of nearest-neighbor based image-classification. In: Computer Vision and Pattern Recognition (2008)","DOI":"10.1109\/CVPR.2008.4587598"},{"key":"14_CR6","doi-asserted-by":"crossref","unstructured":"Crandall, D., Backstrom, L., Huttenlocher, D., Kleinberg, J.: Mapping the world\u2019s photos. In: WWW (2009)","DOI":"10.1145\/1526709.1526812"},{"key":"14_CR7","first-page":"886","volume":"1","author":"N. Dalal","year":"2005","unstructured":"Dalal, N., Triggs, B.: Histograms of oriented gradients for human detection. Computer Vision and Pattern Recognition\u00a01, 886\u2013893 (2005)","journal-title":"Computer Vision and Pattern Recognition"},{"key":"14_CR8","unstructured":"Dasgupta, S., Gupta, A.: An elementary proof of the Johnson-Lindenstrauss lemma. Tech. Rep. TR-99-06, Intl. Comput. Sci. Inst. (1999)"},{"key":"14_CR9","unstructured":"Dean, J., Ghemawat, S.: MapReduce: Simplified Data Processing on Large Clusters. In: OSDI (2004)"},{"key":"14_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1007\/3-540-47979-1_7","volume-title":"Computer Vision - ECCV 2002","author":"P. Duygulu","year":"2002","unstructured":"Duygulu, P., Barnard, K., de Freitas, J.F.G., Forsyth, D.A.: Object recognition as machine translation: Learning a lexicon for a fixed image vocabulary. In: Heyden, A., Sparr, G., Nielsen, M., Johansen, P. (eds.) ECCV 2002, Part IV. LNCS, vol.\u00a02353, pp. 97\u2013112. Springer, Heidelberg (2002)"},{"key":"14_CR11","unstructured":"Everingham, M., Van Gool, L., Williams, C.K.I., Winn, J., Zisserman, A.: The PASCAL Visual Object Classes Challenge (VOC 2008) Results (2008), http:\/\/www.pascal-network.org\/challenges\/VOC\/voc2008\/workshop\/index.html"},{"issue":"4","key":"14_CR12","doi-asserted-by":"publisher","first-page":"594","DOI":"10.1109\/TPAMI.2006.79","volume":"28","author":"L. Fei-Fei","year":"2006","unstructured":"Fei-Fei, L., Fergus, R., Perona, P.: One-shot learning of object categories. IEEE Transactions on Pattern Analysis and Machine Intelligence\u00a028(4), 594\u2013611 (2006)","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"14_CR13","first-page":"524","volume":"2","author":"L. Fei-Fei","year":"2005","unstructured":"Fei-Fei, L., Perona, P.: A bayesian hierarchical model for learning natural scene categories. Computer Vision and Pattern Recognition\u00a02, 524\u2013531 (2005)","journal-title":"Computer Vision and Pattern Recognition"},{"key":"14_CR14","doi-asserted-by":"crossref","unstructured":"Feng, S., Manmatha, R., Lavrenko, V.: Multiple bernoulli relevance models for image and video annotation. In: Computer Vision and Pattern Recognition (2004)","DOI":"10.1109\/CVPR.2004.1315274"},{"issue":"3","key":"14_CR15","doi-asserted-by":"publisher","first-page":"209","DOI":"10.1145\/355744.355745","volume":"3","author":"J. Friedman","year":"1977","unstructured":"Friedman, J., Bentley, J., Finkel, R.: An algorithm for finding best matches in logarithmic expected time. ACM transactions on mathematical software\u00a03(3), 209\u2013226 (1977)","journal-title":"ACM transactions on mathematical software"},{"key":"14_CR16","unstructured":"Gionis, A., Indyk, P., Motwani, R.: Similarity search in high dimensions via hashing. In: Proc. 25th Internat. Conf. on Very Large Data Bases (1999)"},{"key":"14_CR17","unstructured":"Griffin, G., Holub, A., Perona, P.: Caltech-256 object category dataset. Technical Report 7694, California Institute of Technology (2007)"},{"issue":"2","key":"14_CR18","first-page":"10","volume":"28","author":"M. Grubinger","year":"2006","unstructured":"Grubinger, M., Clough, P.D., Leung, C.: The iapr tc-12 benchmark for visual information search. IAPR Newsletter\u00a028(2), 10\u201312 (2006)","journal-title":"IAPR Newsletter"},{"key":"14_CR19","doi-asserted-by":"publisher","first-page":"668","DOI":"10.1145\/1027527.1027681","volume-title":"Proceedings of the 12th annual ACM international conference on Multimedia","author":"A. Hauptmann","year":"2004","unstructured":"Hauptmann, A., Christel, M.: Successful approaches in the TREC video retrieval evaluations. In: Proceedings of the 12th annual ACM international conference on Multimedia, pp. 668\u2013675. ACM, New York (2004)"},{"key":"14_CR20","doi-asserted-by":"crossref","unstructured":"Hays, J., Efros, A.: Scene Completion Using Millions of Photographs. ACM Transactions on Graphics (SIGGRAPH 2007)\u00a026(3) (2007)","DOI":"10.1145\/1276377.1276382"},{"key":"14_CR21","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1145\/1101149.1101154","volume-title":"Proceedings of the 13th annual ACM international conference on Multimedia","author":"G. Iyengar","year":"2005","unstructured":"Iyengar, G., Duygulu, P., Feng, S., Ircing, P., Khudanpur, S., Klakow, D., Krause, M., Manmatha, R., Nock, H., Petkova, D., et al.: Joint visual-text modeling for automatic retrieval of multimedia documents. In: Proceedings of the 13th annual ACM international conference on Multimedia, pp. 21\u201330. ACM, New York (2005)"},{"key":"14_CR22","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1145\/860435.860459","volume-title":"SIGIR 2003: Proceedings of the 26th annual international ACM SIGIR conference on Research and development in informaion retrieval","author":"J. Jeon","year":"2003","unstructured":"Jeon, J., Lavrenko, V., Manmatha, R.: Automatic image annotation and retrieval using cross-media relevance models. In: SIGIR 2003: Proceedings of the 26th annual international ACM SIGIR conference on Research and development in informaion retrieval, pp. 119\u2013126. ACM, New York (2003)"},{"key":"14_CR23","doi-asserted-by":"crossref","unstructured":"Laptev, I., Marszalek, M., Schmid, C., Rozenfeld, B.: Learning realistic human actions from movies. In: IEEE Conference on Computer Vision and Pattern Recognition (2008)","DOI":"10.1109\/CVPR.2008.4587756"},{"key":"14_CR24","unstructured":"Liu, T., Moore, A., Gray, A., Yang, K.: An investigation of practical approximate nearest neighbor algorithms. In: Advances in neural information processing systems (2004)"},{"key":"14_CR25","volume-title":"Proceedings of the Eighth IEEE Workshop on Applications of Computer Vision","author":"T. Liu","year":"2007","unstructured":"Liu, T., Rosenberg, C., Rowley, H.: Clustering billions of images with large scale nearest neighbor search. In: Proceedings of the Eighth IEEE Workshop on Applications of Computer Vision. IEEE Computer Society, Washington (2007)"},{"key":"14_CR26","doi-asserted-by":"publisher","first-page":"245","DOI":"10.1145\/1290082.1290117","volume-title":"Proceedings of the international workshop on multimedia information retrieval","author":"A. Loui","year":"2007","unstructured":"Loui, A., Luo, J., Chang, S., Ellis, D., Jiang, W., Kennedy, L., Lee, K., Yanagawa, A.: Kodak\u2019s consumer video benchmark data set: concept definition and annotation. In: Proceedings of the international workshop on multimedia information retrieval, pp. 245\u2013254. ACM, New York (2007)"},{"issue":"2","key":"14_CR27","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"D. Lowe","year":"2004","unstructured":"Lowe, D.: Distinctive image features from scale-invariant keypoints. International Journal of Computer Vision\u00a060(2), 91\u2013110 (2004)","journal-title":"International Journal of Computer Vision"},{"key":"14_CR28","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"316","DOI":"10.1007\/978-3-540-88690-7_24","volume-title":"Computer Vision \u2013 ECCV 2008","author":"A. Makadia","year":"2008","unstructured":"Makadia, A., Pavlovic, V., Kumar, S.: A New Baseline for Image Annotation. In: Forsyth, D., Torr, P., Zisserman, A. (eds.) ECCV 2008, Part III. LNCS, vol.\u00a05304, pp. 316\u2013329. Springer, Heidelberg (2008)"},{"key":"14_CR29","doi-asserted-by":"crossref","unstructured":"Matas, J., Chum, O., Martin, U., Pajdla, T.: Robust wide baseline stereo from maximally stable extremal regions. In: BMVC, vol.\u00a01, pp. 384\u2013393 (2002)","DOI":"10.5244\/C.16.36"},{"key":"14_CR30","unstructured":"Messing, R., Pal, C.: Behavior recognition in video with extended models of feature velocity dynamics. Technical report, AAAI Spring Symposium Technical Reports (2009)"},{"key":"14_CR31","unstructured":"Mori, Y., Takahashi, H., Oka, R.: Image-to-word transformation based on dividing and vector quantizing images with words. In: International Workshop on Multimedia Intelligent Storage and Retrieval Management (1999)"},{"key":"14_CR32","doi-asserted-by":"crossref","unstructured":"Nist\u00e9r, D., Stew\u00e9nius, H.: Scalable recognition with a vocabulary tree. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2006, vol.\u00a02, pp. 2161\u20132168 (2006)","DOI":"10.1109\/CVPR.2006.264"},{"issue":"2","key":"14_CR33","first-page":"273","volume":"1","author":"S. Omohundro","year":"1987","unstructured":"Omohundro, S.: Efficient algorithms with neural network behavior. Complex Systems\u00a01(2), 273\u2013347 (1987)","journal-title":"Complex Systems"},{"key":"14_CR34","unstructured":"Over, P., Awad, G., Rose, T., Fiscus, J., Kraaij, W., Smeaton-Alan, A.: TRECVID 2008\u2013Goals, Tasks, Data, Evaluation Mechanisms and Metrics (2008)"},{"key":"14_CR35","unstructured":"Polana, R., Nelson, R.: Detecting activities. In: Computer Vision and Pattern Recognition, CVPR 2003 (2003)"},{"issue":"1","key":"14_CR36","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1007\/s11263-007-0090-8","volume":"77","author":"B. Russell","year":"2008","unstructured":"Russell, B., Torralba, A., Murphy, K., Freeman, W.: LabelMe: a database and web-based tool for image annotation. International Journal of Computer Vision\u00a077(1), 157\u2013173 (2008)","journal-title":"International Journal of Computer Vision"},{"key":"14_CR37","doi-asserted-by":"crossref","unstructured":"Schindler, G., Brown, M., Szeliski, R.: City-scale location recognition. In: IEEE Conf. on Computer Vision and Pattern Recognition, CVPR (2007)","DOI":"10.1109\/CVPR.2007.383150"},{"key":"14_CR38","doi-asserted-by":"crossref","unstructured":"Schuldt, C., Laptev, I., Caputo, B.: Recognizing human actions: A local svm approach. In: International Conference on Pattern Recognition (2004)","DOI":"10.1109\/ICPR.2004.1334462"},{"key":"14_CR39","doi-asserted-by":"crossref","unstructured":"Shakhnarovich, G., Viola, P., Darrell, T.: Fast pose estimation with parameter-sensitive hashing. In: Ninth IEEE International Conference on Computer Vision (2003)","DOI":"10.1109\/ICCV.2003.1238424"},{"key":"14_CR40","doi-asserted-by":"crossref","unstructured":"Sivic, J., Zisserman, A.: Video Google: A text retrieval approach to object matching in videos. In: Ninth IEEE International Conference on Computer Vision (2003)","DOI":"10.1109\/ICCV.2003.1238663"},{"key":"14_CR41","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1145\/1178677.1178722","volume-title":"Proceedings of the 8th ACM international workshop on Multimedia information retrieval","author":"A. Smeaton","year":"2006","unstructured":"Smeaton, A., Over, P., Kraaij, W.: Evaluation campaigns and TRECVid. In: Proceedings of the 8th ACM international workshop on Multimedia information retrieval, pp. 321\u2013330. ACM, New York (2006)"},{"key":"14_CR42","doi-asserted-by":"crossref","unstructured":"Torralba, A., Fergus, R., Freeman, W.T.: 80 million tiny images: a large dataset for non-parametric object and scene recognition. IEEE PAMI (2008)","DOI":"10.1109\/TPAMI.2008.128"},{"issue":"4","key":"14_CR43","doi-asserted-by":"publisher","first-page":"175","DOI":"10.1016\/0020-0190(91)90074-R","volume":"40","author":"J. Uhlmann","year":"1991","unstructured":"Uhlmann, J.: Satisfying general proximity\/similarity queries with metric trees. Information Processing Letters\u00a040(4), 175\u2013179 (1991)","journal-title":"Information Processing Letters"},{"key":"14_CR44","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"415","DOI":"10.1007\/978-3-540-79547-6_40","volume-title":"Computer Vision Systems","author":"A. Ulges","year":"2008","unstructured":"Ulges, A., Schulze, C., Keysers, D., Breuel, T.: A System that Learns to Tag Videos by Watching Youtube. In: Gasteratos, A., Vincze, M., Tsotsos, J.K. (eds.) ICVS 2008. LNCS, vol.\u00a05008, p. 415. Springer, Heidelberg (2008)"},{"key":"14_CR45","doi-asserted-by":"crossref","first-page":"319","DOI":"10.1145\/985692.985733","volume-title":"Proceedings of the SIGCHI conference on Human factors in computing systems","author":"L. Ahn Von","year":"2004","unstructured":"Von Ahn, L., Dabbish, L.: Labeling images with a computer game. In: Proceedings of the SIGCHI conference on Human factors in computing systems, pp. 319\u2013326. ACM, New York (2004)"},{"key":"14_CR46","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1145\/1124772.1124782","volume-title":"Proceedings of the SIGCHI conference on Human Factors in computing systems","author":"L. Ahn Von","year":"2006","unstructured":"Von Ahn, L., Liu, R., Blum, M.: Peekaboom: a game for locating objects in images. In: Proceedings of the SIGCHI conference on Human Factors in computing systems, pp. 55\u201364. ACM, New York (2006)"},{"key":"14_CR47","volume-title":"Managing Gigabytes: Compressing and Indexing Documents and Images","author":"I. Witten","year":"1999","unstructured":"Witten, I., Moffat, A., Bell, T.: Managing Gigabytes: Compressing and Indexing Documents and Images. Morgan Kaufmann, San Francisco (1999)"},{"key":"14_CR48","doi-asserted-by":"crossref","unstructured":"Zanetti, S., Zelnik-Manor, L., Perona, P.: A walk through the webs video clips. In: IEEE Computer Society Conference on Computer Vision and Pattern Recognition Workshops (2008)","DOI":"10.1109\/CVPRW.2008.4562955"}],"container-title":["Studies in Computational Intelligence","Video Search and Mining"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-12900-1_14.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,20]],"date-time":"2025-02-20T02:14:54Z","timestamp":1740017694000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-12900-1_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010]]},"ISBN":["9783642128998","9783642129001"],"references-count":48,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-12900-1_14","relation":{},"ISSN":["1860-949X","1860-9503"],"issn-type":[{"type":"print","value":"1860-949X"},{"type":"electronic","value":"1860-9503"}],"subject":[],"published":{"date-parts":[[2010]]}}}