{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,17]],"date-time":"2026-02-17T12:40:51Z","timestamp":1771332051166,"version":"3.50.1"},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2015,12,24]],"date-time":"2015-12-24T00:00:00Z","timestamp":1450915200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/100011039","name":"IARPA","doi-asserted-by":"crossref","award":["D11PC20068"],"award-info":[{"award-number":["D11PC20068"]}],"id":[{"id":"10.13039\/100011039","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61373114"],"award-info":[{"award-number":["61373114"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Multimed Info Retr"],"published-print":{"date-parts":[[2016,3]]},"DOI":"10.1007\/s13735-015-0093-0","type":"journal-article","created":{"date-parts":[[2015,12,24]],"date-time":"2015-12-24T12:23:45Z","timestamp":1450959825000},"page":"3-18","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Text-to-video: a semantic search engine for internet videos"],"prefix":"10.1007","volume":"5","author":[{"given":"Lu","family":"Jiang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shoou-I","family":"Yu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Deyu","family":"Meng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Teruko","family":"Mitamura","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alexander G.","family":"Hauptmann","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2015,12,24]]},"reference":[{"key":"93_CR1","doi-asserted-by":"crossref","unstructured":"Banerjee S, Pedersen T (2002) An adapted lesk algorithm for word sense disambiguation using wordnet. In: CICLing","DOI":"10.1007\/3-540-45715-1_11"},{"key":"93_CR2","doi-asserted-by":"crossref","unstructured":"Bengio Y, Louradour J, Collobert R, Weston J (2009) Curriculum learning. In: ICML","DOI":"10.1145\/1553374.1553380"},{"key":"93_CR3","doi-asserted-by":"crossref","unstructured":"Bhattacharya S, Yu FX, Chang SF (2014) Minimally needed evidence for complex event recognition in unconstrained videos. In: ICMR","DOI":"10.1145\/2578726.2578740"},{"key":"93_CR4","first-page":"27:1","volume":"2","author":"CC Chang","year":"2011","unstructured":"Chang CC, Lin CJ (2011) LIBSVM: a library for support vector machines. ACM TIST 2:27:1\u201327:27","journal-title":"ACM TIST"},{"issue":"1","key":"93_CR5","first-page":"22","volume":"16","author":"KW Church","year":"1990","unstructured":"Church KW, Hanks P (1990) Word association norms, mutual information, and lexicography. Comput Linguist 16(1):22\u201329","journal-title":"Comput Linguist"},{"key":"93_CR6","doi-asserted-by":"crossref","unstructured":"Dalton J, Allan J, Mirajkar P (2013) Zero-shot video retrieval using content and concepts. In: CIKM","DOI":"10.1145\/2505515.2507880"},{"key":"93_CR7","doi-asserted-by":"crossref","unstructured":"Davidson J, Liebald B, Liu J et\u00a0al (2010) The youtube video recommendation system. In: RecSys","DOI":"10.1145\/1864708.1864770"},{"key":"93_CR8","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R, Li LJ, Li K, Fei-Fei L (2009) Imagenet: a large-scale hierarchical image database. In: CVPR","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"93_CR9","doi-asserted-by":"crossref","unstructured":"Gkalelis N, Mezaris V (2014) Video event detection using generalized subclass discriminant analysis and linear support vector machines. In: ICMR","DOI":"10.1145\/2578726.2578745"},{"key":"93_CR10","doi-asserted-by":"crossref","unstructured":"Habibian A, Mensink T, Snoek CG (2014) Composite concept discovery for zero-shot video event detection. In: ICMR","DOI":"10.1145\/2578726.2578746"},{"key":"93_CR11","doi-asserted-by":"crossref","unstructured":"Habibian A, van\u00a0de Sande KE, Snoek CG (2013) Recommendations for video event recognition using concept vocabularies. In: ICMR","DOI":"10.1145\/2461466.2461482"},{"issue":"5","key":"93_CR12","first-page":"958","volume":"9","author":"A Hauptmann","year":"2007","unstructured":"Hauptmann A, Yan R, Lin WH, Christel M, Wactlar H (2007) Can high-level concepts fill the semantic gap in video retrieval? A case study with broadcast news. TMM 9(5):958\u2013966","journal-title":"TMM"},{"key":"93_CR13","doi-asserted-by":"crossref","unstructured":"Inoue N, Shinoda K (2014) n-gram models for video semantic indexing. In: MM","DOI":"10.1145\/2647868.2654961"},{"key":"93_CR14","doi-asserted-by":"crossref","unstructured":"Jiang L, Hauptmann A, Xiang G (2012) Leveraging high-level and low-level features for multimedia event detection. In: MM","DOI":"10.1145\/2393347.2393412"},{"key":"93_CR15","doi-asserted-by":"crossref","unstructured":"Jiang L, Meng D, Mitamura T, Hauptmann AG (2014) Easy samples first: self-paced reranking for zero-example multimedia search. In: MM","DOI":"10.1145\/2647868.2654918"},{"key":"93_CR16","unstructured":"Jiang L, Meng D, Yu SI, Lan Z, Shan S, Hauptmann AG (2014) Self-paced learning with diversity. In: NIPS"},{"key":"93_CR17","doi-asserted-by":"crossref","unstructured":"Jiang L, Meng D, Zhao Q, Shan S, Hauptmann AG (2015) Self-paced curriculum learning. In: AAAI","DOI":"10.1609\/aaai.v29i1.9608"},{"key":"93_CR18","doi-asserted-by":"crossref","unstructured":"Jiang L, Mitamura T, Yu SI, Hauptmann AG (2014) Zero-example event search using multimodal pseudo relevance feedback. In: ICMR","DOI":"10.1145\/2578726.2578764"},{"key":"93_CR19","doi-asserted-by":"crossref","unstructured":"Jiang L, Tong W, Meng D, Hauptmann AG (2014) Towards efficient learning of optimal spatial bag-of-words representations. In: ICMR","DOI":"10.1145\/2578726.2578739"},{"key":"93_CR20","doi-asserted-by":"crossref","unstructured":"Jiang L, Yu SI, Meng D, Mitamura T, Hauptmann AG (2015) Bridging the ultimate semantic gap: a semantic search engine for internet videos. In: ICMR","DOI":"10.1145\/2671188.2749399"},{"key":"93_CR21","doi-asserted-by":"crossref","unstructured":"Jiang L, Yu SI, Meng D, Yang Y, Mitamura T, Hauptmann AG (2015) Fast and accurate content-based semantic search in 100m internet videos. In: MM","DOI":"10.1145\/2733373.2806237"},{"key":"93_CR22","doi-asserted-by":"crossref","unstructured":"Karpathy A, Toderici G, Shetty S, Leung T, Sukthankar R, Fei-Fei L (2014) Large-scale video classification with convolutional neural networks. In: CVPR","DOI":"10.1109\/CVPR.2014.223"},{"key":"93_CR23","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) Imagenet classification with deep convolutional neural networks. In: NIPS"},{"key":"93_CR24","unstructured":"Kumar M, Packer B, Koller D (2010) Self-paced learning for latent variable models. In: NIPS"},{"key":"93_CR25","unstructured":"Lee H (2014) Analyzing complex events and human actions in\u201c in-the-wild\u201d videos. In: UMD Ph.D Theses and Dissertations"},{"key":"93_CR26","doi-asserted-by":"crossref","unstructured":"Levy O, Goldberg Y (2014) Dependency-based word embeddings. In: ACL","DOI":"10.3115\/v1\/P14-2050"},{"key":"93_CR27","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9780511809071","volume-title":"Introduction to information retrieval","author":"CD Manning","year":"2008","unstructured":"Manning CD, Raghavan P, Sch\u00fctze H (2008) Introduction to information retrieval, vol 1. Cambridge University Press, Cambridge"},{"key":"93_CR28","doi-asserted-by":"crossref","unstructured":"Mazloom M, Li X, Snoek CG (2014) Few-example video event retrieval using tag propagation. In: ICMR","DOI":"10.1145\/2578726.2578793"},{"key":"93_CR29","doi-asserted-by":"crossref","unstructured":"Miao Y, Gowayyed M, Metze F (2015) Eesen: end-to-end speech recognition using deep rnn models and wfst-based decoding. arXiv:1507.08240","DOI":"10.1109\/ASRU.2015.7404790"},{"key":"93_CR30","doi-asserted-by":"crossref","unstructured":"Miao Y, Jiang L, Zhang H, Metze F (2014) Improvements to speaker adaptive training of deep neural networks. In: SLT","DOI":"10.1109\/SLT.2014.7078568"},{"key":"93_CR31","doi-asserted-by":"crossref","unstructured":"Miao Y, Metze F (2013) Improving low-resource cd-dnn-hmm using dropout and multilingual dnn training. In: INTERSPEECH","DOI":"10.21437\/Interspeech.2013-526"},{"key":"93_CR32","doi-asserted-by":"crossref","unstructured":"Miao Y, Metze F, Rawat S (2013) Deep maxout networks for low-resource speech recognition. In: ASRU","DOI":"10.1109\/ASRU.2013.6707763"},{"key":"93_CR33","unstructured":"Mikolov T, Sutskever I, Chen K, Corrado GS, Dean J (2013) Distributed representations of words and phrases and their compositionality. In: NIPS"},{"key":"93_CR34","unstructured":"Norouzi M, Mikolov T, Bengio S, Singer Y, Shlens J, Frome, A, Corrado GS, Dean J (2014) Zero-shot learning by convex combination of semantic embeddings. In: ICLR"},{"issue":"1","key":"93_CR35","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1007\/s00138-013-0525-x","volume":"25","author":"S Oh","year":"2014","unstructured":"Oh S, McCloskey S, Kim I, Vahdat A, Cannons KJ, Hajimirsadeghi H, Mori G, Perera AA, Pandey M, Corso JJ (2014) Multimedia event detection with multimodal feature fusion and temporal concept localization. Mach Vision Appl 25(1):49\u201369","journal-title":"Mach Vision Appl"},{"key":"93_CR36","unstructured":"Over P, Awad G, Michel M, Fiscus J, Sanders G, Kraaij W, Smeaton AF, Quenot G (2014) TRECVID 2014\u2014an overview of the goals, tasks, data, evaluation mechanisms and metrics. In: TRECVID"},{"key":"93_CR37","unstructured":"Palatucci M, Pomerleau D, Hinton GE, Mitchell TM (2009) Zero-shot learning with semantic output codes. In: NIPS"},{"key":"93_CR38","unstructured":"Povey D, Ghoshal A, Boulianne G et al (2011) The kaldi speech recognition toolkit. In: ASRU"},{"key":"93_CR39","doi-asserted-by":"crossref","unstructured":"Safadi B, Sahuguet M, Huet B (2014) When textual and visual information join forces for multimedia retrieval. In: ICMR","DOI":"10.1145\/2578726.2578760"},{"key":"93_CR40","unstructured":"Thomee B, Shamma DA, Friedland G, Elizalde B, Ni K, Poland D, Borth D, Li LJ (2015) The new data and new challenges in multimedia research. arXiv:1503.01817"},{"issue":"1","key":"93_CR41","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1007\/s00138-013-0529-6","volume":"25","author":"W Tong","year":"2014","unstructured":"Tong W, Yang Y, Jiang L et al (2014) E-lamp: integration of innovative ideas for multimedia event detection. Mach Vision Appl 25(1):5\u201315","journal-title":"Mach Vision Appl"},{"issue":"3","key":"93_CR42","doi-asserted-by":"crossref","first-page":"480","DOI":"10.1109\/TPAMI.2011.153","volume":"34","author":"A Vedaldi","year":"2012","unstructured":"Vedaldi A, Zisserman A (2012) Efficient additive kernels via explicit feature maps. PAMI 34(3):480\u2013492","journal-title":"PAMI"},{"key":"93_CR43","doi-asserted-by":"crossref","unstructured":"Wang F, Sun Z, Jiang Y, Ngo C (2013) Video event detection using motion relativity and feature selection. In: TMM","DOI":"10.1109\/TMM.2014.2315780"},{"key":"93_CR44","doi-asserted-by":"crossref","unstructured":"Wang H, Schmid C (2013) Action recognition with improved trajectories. In: ICCV","DOI":"10.1109\/ICCV.2013.441"},{"key":"93_CR45","doi-asserted-by":"crossref","unstructured":"Wu S, Bondugula S, Luisier F, Zhuang X, Natarajan P (2014) Zero-shot event detection using multi-modal fusion of weakly supervised concepts. In: CVPR","DOI":"10.1109\/CVPR.2014.341"},{"key":"93_CR46","doi-asserted-by":"crossref","unstructured":"Wu Z, Palmer M (1994) Verbs semantics and lexical selection. In: ACL","DOI":"10.3115\/981732.981751"},{"key":"93_CR47","doi-asserted-by":"crossref","unstructured":"Younessian E, Mitamura T, Hauptmann A (2012) Multimodal knowledge-based analysis in multimedia event detection. In: ICMR","DOI":"10.1145\/2324796.2324855"},{"key":"93_CR48","doi-asserted-by":"crossref","unstructured":"Yu SI, Jiang L, Hauptmann A (2014) Instructional videos for unsupervised harvesting and learning of action examples. In: MM","DOI":"10.1145\/2647868.2654997"},{"key":"93_CR49","doi-asserted-by":"crossref","unstructured":"Yu SI, Jiang L, Xu Z, Yang Y, Hauptmann AG (2015) Content-based video search over 1 million videos with 1 core in 1 second. In: ICMR","DOI":"10.1145\/2671188.2749398"},{"key":"93_CR50","unstructured":"Yu SI, Jiang L, Xu Z et al (2014) Cmu-informedia@trecvid 2014. In: TRECVID"},{"key":"93_CR51","doi-asserted-by":"crossref","unstructured":"Zhai C, Lafferty J (2004) A study of smoothing methods for language models applied to information retrieval. TOIS 22(2)","DOI":"10.1145\/984321.984322"},{"key":"93_CR52","doi-asserted-by":"crossref","unstructured":"Zhao Q, Meng D, Jiang L, Xie Q, Xu Z, Hauptmann AG (2015) Self-paced learning for matrix factorization. In: AAAI","DOI":"10.1609\/aaai.v29i1.9584"}],"container-title":["International Journal of Multimedia Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s13735-015-0093-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s13735-015-0093-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s13735-015-0093-0","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,16]],"date-time":"2023-08-16T08:28:49Z","timestamp":1692174529000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s13735-015-0093-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,12,24]]},"references-count":52,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2016,3]]}},"alternative-id":["93"],"URL":"https:\/\/doi.org\/10.1007\/s13735-015-0093-0","relation":{},"ISSN":["2192-6611","2192-662X"],"issn-type":[{"value":"2192-6611","type":"print"},{"value":"2192-662X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015,12,24]]}}}