{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,21]],"date-time":"2025-12-21T07:11:34Z","timestamp":1766301094114},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2013,1,4]],"date-time":"2013-01-04T00:00:00Z","timestamp":1357257600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Int J Multimed Info Retr"],"published-print":{"date-parts":[[2013,6]]},"DOI":"10.1007\/s13735-012-0028-y","type":"journal-article","created":{"date-parts":[[2013,1,3]],"date-time":"2013-01-03T08:14:47Z","timestamp":1357200887000},"page":"131-144","source":"Crossref","is-referenced-by-count":14,"title":["Beyond audio and video retrieval: topic-oriented multimedia summarization"],"prefix":"10.1007","volume":"2","author":[{"given":"Florian","family":"Metze","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Duo","family":"Ding","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ehsan","family":"Younessian","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alexander","family":"Hauptmann","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2013,1,4]]},"reference":[{"key":"28_CR1","doi-asserted-by":"crossref","unstructured":"Banerjee S, Rudnicky AI (2008) An extractive-summarization baseline for the automatic detection of noteworthy utterances in multi-party human-human dialog. In: Proceedings of spoken language technology (SLT). IEEE, Goa","DOI":"10.1109\/SLT.2008.4777869"},{"key":"28_CR2","doi-asserted-by":"crossref","unstructured":"Bao L, Cao J, Zhang Y, Li J, Chen MY, Hauptmann AG (2010) Explicit and implicit concept-based video retrieval with bipartite graph propagation model. In: Proceedings of the international conference on multimedia (ACM MM \u201910). ACM, New York","DOI":"10.1145\/1873951.1874118"},{"key":"28_CR3","unstructured":"Bao L, Yu SI, Lan ZZ, Overwijk A, Jin Q, Langner B, Garbus M, Burger S, Metze F, Hauptmann A (2011) Informedia @ TrecVID 2011. In: Proceedings of TrecVID workshop. NIST, Gaithersburg"},{"key":"28_CR4","doi-asserted-by":"crossref","unstructured":"Chen FR, Withgott MM (1992) The use of emphasis to automatically summarize a spoken discourse. In: Proceedings of ICASSP. IEEE, San Francisco","DOI":"10.1109\/ICASSP.1992.225930"},{"key":"28_CR5","doi-asserted-by":"crossref","unstructured":"Christel MG (2006) Evaluation and user studies with respect to video summarization and browsing. In: Proceedings of multimedia content analysis, management, and retrieval. IS &T\/SPIE Symposium on Electronic Imaging, San Jose","DOI":"10.1117\/12.642841"},{"key":"28_CR6","volume-title":"Automated metadata in multimedia information systems: creation, refinement, use in surrogates, and evaluation","author":"MG Christel","year":"2009","unstructured":"Christel MG (2009) Automated metadata in multimedia information systems: creation, refinement, use in surrogates, and evaluation. Morgan and Claypool, San Rafael"},{"key":"28_CR7","unstructured":"Ding D, Metze F, Rawat S, Schulam PF, Burger S (2012) Generating natural language summaries for multimedia. In: Proceedings of 7th international natural language generation conference. ACL, Starved Rock"},{"key":"28_CR8","doi-asserted-by":"crossref","unstructured":"Ding D, Metze F, Rawat S, Schulam PF, Burger S, Younessian E, Bao L, Christel MG, Hauptmann A (2012) Beyond audio and video retrieval: towards multimedia summarization. In: Proceedings of ICMR. ACM, Hong Kong","DOI":"10.1145\/2324796.2324799"},{"key":"28_CR9","unstructured":"Do Q, Roth D, Sammons M, Tu Y, Vydiswaran VV (2009) Robust, light-weight approaches to compute lexical similarity. Technical report, University of Illinois. Computer Science Research and Technical Reports"},{"key":"28_CR10","doi-asserted-by":"crossref","unstructured":"Dumont E, M\u00e9rialdo B (2009) Automatic evaluation method for rushes summary content. In: Proceedings of the 2009 IEEE international conference on multimedia and expo, ICME\u201909. IEEE Press","DOI":"10.1109\/ICME.2009.5202584"},{"issue":"4","key":"28_CR11","doi-asserted-by":"crossref","first-page":"401","DOI":"10.1109\/TSA.2004.828699","volume":"12","author":"S Furui","year":"2004","unstructured":"Furui S, Kikuchi T, Shinnaka Y, Hori C (2004) Speech-to-text and speech-to-speech summarization of spontaneous speech. IEEE Trans Speech Audio Process 12(4):401","journal-title":"IEEE Trans Speech Audio Process"},{"key":"28_CR12","doi-asserted-by":"crossref","unstructured":"Hauptmann AG, Christel MG, Lin WH, Maher B, Yang J, Baron RV, Xiang G (2007) Clever clustering vs. simple speed-up for summarizing rushes. In: Proceedings of TRECVID video summarization workshop (TVS \u201907). NIST","DOI":"10.1145\/1290031.1290034"},{"issue":"1","key":"28_CR13","first-page":"15","volume":"87D","author":"C Hori","year":"2004","unstructured":"Hori C, Furui S (2004) Speech summarization: an approach through word extraction and a method for evaluation. IEICE Trans Inf Syst E 87D(1):15\u201325","journal-title":"IEICE Trans Inf Syst E"},{"key":"28_CR14","doi-asserted-by":"crossref","unstructured":"Jin Q, Schulam PF, Rawat S, Burger S, Ding D, Metze F (2012) Event-based video retrieval using audio. In: Proceedings of INTERSPEECH. ISCA, Portland","DOI":"10.21437\/Interspeech.2012-556"},{"key":"28_CR15","unstructured":"Kolb P (2009) Experiments on the difference between semantic similarity and relatedness. In: Proceedings of 17th Nordic conference on computational linguistics, NODALIDA \u201909. Odense, Denmark"},{"key":"28_CR16","unstructured":"Langner B, Black A (2009) Mountain: a translation-based approach to natural language generation for dialog systems. In: Proceedings of IWSDS. Irsee, Germany"},{"key":"28_CR17","unstructured":"Li H, Bao L, Gao Z, Overwijk A, Liu W, Zhang LF, Yu SI, Chen MY, Metze F, Hauptmann A (2010) Informedia @ TrecVID 2010. In: Proceedings of 2010 TrecVID Workshop. NIST, Gaithersburg"},{"key":"28_CR18","doi-asserted-by":"crossref","unstructured":"Li Y, Merialdo B (2010) Multi-video summarization based on av-mmr. In: Proceedings of 2010 international workshop on content-based multimedia indexing, pp 1\u20136","DOI":"10.1109\/CBMI.2010.5529899"},{"key":"28_CR19","doi-asserted-by":"crossref","unstructured":"Li Y, Merialdo B (2010) Vert: automatic evaluation of video summaries. In: Proceedings of the international conference on multimedia, MM \u201910. ACM, New York","DOI":"10.1145\/1873951.1874095"},{"key":"28_CR20","doi-asserted-by":"crossref","unstructured":"Liu F, Liu Y (2010) Using spoken utterance compression for meeting summarization: a pilot study. In: Proceedings of spoken language technology. IEEE","DOI":"10.1109\/SLT.2010.5700819"},{"key":"28_CR21","unstructured":"Malkin RG (2007) Multimodal technologies for perception of humans. The CLEAR 2006 CMU acoustic environment classification system. Springer, Berlin"},{"issue":"6","key":"28_CR22","doi-asserted-by":"crossref","first-page":"616","DOI":"10.1016\/j.ipm.2009.05.007","volume":"45","author":"G Marchionini","year":"2009","unstructured":"Marchionini G, Song Y, Ferrell R (2009) Multimedia surrogates for video gisting: toward combining spoken words and imagery. Inf Process Manag 45(6):616\u2013630","journal-title":"Inf Process Manag"},{"key":"28_CR23","unstructured":"National Institute of Science and Technology: Guidelines for TRECVID (2012) http:\/\/www-nlpir.nist.gov\/projects\/tv2012\/tv2012.htmlmer"},{"key":"28_CR24","doi-asserted-by":"crossref","unstructured":"Nenkova A (2006) Summarization evaluation for text and speech: issues and approaches. In: Proceedings of INTERSPEECH. ISCA, Pittsburgh","DOI":"10.21437\/Interspeech.2006-429"},{"key":"28_CR25","doi-asserted-by":"crossref","unstructured":"Nguyen C, Niu Y, Liu F (2012) Video summagator: an interface for video summarization and navigation. In: Proceedings of the SIGCHI conference on human factors in computing systems, CHI \u201912. ACM, New York","DOI":"10.1145\/2207676.2207767"},{"key":"28_CR26","unstructured":"NIST Information Technology Laboratory: 2011 TRECVID Multimedia Event Detection Track (2011) http:\/\/www.nist.gov\/itl\/iad\/mig\/med11.cfm"},{"key":"28_CR27","unstructured":"Objectbank. http:\/\/vision.stanford.edu\/projects\/objectbank\/"},{"key":"28_CR28","unstructured":"Over P (2011) Guidelines for trecvid 2011. http:\/\/www-nlpir.nist.gov\/projects\/tv2011\/tv2011.htmlsin . NIST"},{"key":"28_CR29","doi-asserted-by":"crossref","unstructured":"Shen EYT, Lieberman H, Davenport G (2009) What\u2019s next? Emergent storytelling from video collection. In: Proceedings of the SIGCHI conference on human factors in computing systems, CHI \u201909. ACM, New York","DOI":"10.1145\/1518701.1518825"},{"key":"28_CR30","doi-asserted-by":"crossref","unstructured":"Snoek CG, Worring M (2008) Concept-based video retrieval. Found Trends Inf Retrieval 2(4):215\u2013322","DOI":"10.1561\/1500000014"},{"key":"28_CR31","doi-asserted-by":"crossref","unstructured":"Song Y, Marchionini G, Oh CY (2010) What are the most eye-catching and ear-catching features in the video?: implications for video summarization. In: Rappa M, Jones P, Freire J, Chakrabarti S (eds) Proc. WWW. ACM, New York","DOI":"10.1145\/1772690.1772783"},{"key":"28_CR32","doi-asserted-by":"crossref","unstructured":"Tan CC, Jiang YG, Ngo CW (2011) Towards textually describing complex video contents with audio-visual concept classifiers. In: Proceedings of ACM multiMedia. ACM, Scottsdale","DOI":"10.1145\/2072298.2072411"},{"issue":"1","key":"28_CR33","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/1198302.1198305","volume":"3","author":"BT Truong","year":"2007","unstructured":"Truong BT, Venkatesh S (2007) Video abstraction: a systematic review and classification. ACM Trans Multimedia Comput Commun Appl 3(1):1\u201337","journal-title":"ACM Trans Multimedia Comput Commun Appl"},{"key":"28_CR34","doi-asserted-by":"crossref","unstructured":"Ushiku Y, Harada T, Kuniyashi Y (2011) Understanding images with natural sentences. In: Proceedings of ACM multiMedia. ACM, Scottsdale","DOI":"10.1145\/2072298.2072417"},{"key":"28_CR35","unstructured":"Valenza R, Robinson T, Hickey M, Tucker R (1999) Summarization of spoken audio through information extraction. In: Proceedings of ESCA workshop on accessing information in spoken audio, pp 111\u2013116"},{"key":"28_CR36","doi-asserted-by":"crossref","unstructured":"Wang F, Merialdo B (2009) Multi-document video summarization. In: Proceedings of the 2009 IEEE international conference on multimedia and expo, ICME\u201909. IEEE Press","DOI":"10.1109\/ICME.2009.5202747"},{"key":"28_CR37","doi-asserted-by":"crossref","unstructured":"Westman S (2010) Research and advanced technology for digital libraries. In: Lecture notes in computer science, vol 6273. Evaluation constructs for visual video summaries. Springer, Berlin","DOI":"10.1007\/978-3-642-15464-5_9"},{"key":"28_CR38","doi-asserted-by":"crossref","unstructured":"Zsombori V, Frantzis M, Guimaraes RL, Ursu MF, Cesar P, Kegel I, Craigie R, Bulterman DC (2011) Automatic generation of video narratives from shared ugc. In: Proceedings of the 22nd ACM conference on Hypertext and hypermedia, HT \u201911. ACM, New York","DOI":"10.1145\/1995966.1996009"}],"container-title":["International Journal of Multimedia Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s13735-012-0028-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s13735-012-0028-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s13735-012-0028-y","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,2,5]],"date-time":"2022-02-05T03:40:46Z","timestamp":1644032446000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s13735-012-0028-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,1,4]]},"references-count":38,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2013,6]]}},"alternative-id":["28"],"URL":"https:\/\/doi.org\/10.1007\/s13735-012-0028-y","relation":{},"ISSN":["2192-6611","2192-662X"],"issn-type":[{"value":"2192-6611","type":"print"},{"value":"2192-662X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2013,1,4]]}}}