{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,1,9]],"date-time":"2024-01-09T23:29:51Z","timestamp":1704842991741},"reference-count":50,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2015,11,30]],"date-time":"2015-11-30T00:00:00Z","timestamp":1448841600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Multimed Info Retr"],"published-print":{"date-parts":[[2016,6]]},"DOI":"10.1007\/s13735-015-0090-3","type":"journal-article","created":{"date-parts":[[2015,11,30]],"date-time":"2015-11-30T09:23:52Z","timestamp":1448875432000},"page":"73-88","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["On the use of commonsense ontology for multimedia event recounting"],"prefix":"10.1007","volume":"5","author":[{"given":"Chun-Chet","family":"Tan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chong-Wah","family":"Ngo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2015,11,30]]},"reference":[{"issue":"11","key":"90_CR1","doi-asserted-by":"crossref","first-page":"1222","DOI":"10.1109\/34.969114","volume":"23","author":"Y Boykov","year":"2001","unstructured":"Boykov Y, Veksler O, Zabih R (2001) Fast approximate energy minimization via graph cuts. IEEE Trans Pattern Anal Mach Intell 23(11):1222\u20131239","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"90_CR2","doi-asserted-by":"crossref","unstructured":"Chua TS, Tang J, Hong R, Li H, Luo Z, Zheng Y (2009) NUS-wide: a real-world web image database from National University of Singapore. In: Proceedings of CIVR, pp 48:1\u201348:9","DOI":"10.1145\/1646396.1646452"},{"issue":"3","key":"90_CR3","doi-asserted-by":"crossref","first-page":"370","DOI":"10.1109\/TKDE.2007.48","volume":"19","author":"RL Cilibrasi","year":"2007","unstructured":"Cilibrasi RL, Vitanyi PMB (2007) The Google similarity distance. IEEE Trans Knowl Data Eng 19(3):370\u2013383","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"90_CR4","doi-asserted-by":"crossref","unstructured":"Dalal N, Triggs B, Schmid C (2006) Human detection using oriented histograms of flow and appearance. In: Proceedings of ECCV, pp 428\u2013441","DOI":"10.1007\/11744047_33"},{"key":"90_CR5","doi-asserted-by":"crossref","unstructured":"Das P, Xu C, Doell RF, Corso JJ (2013) A thousand frames in just a few words: lingual description of videos through latent topics and sparse object stitching. In: Proceedings of CVPR, pp 2634\u20132641","DOI":"10.1109\/CVPR.2013.340"},{"key":"90_CR6","doi-asserted-by":"crossref","unstructured":"Demarty CH, Penet C, Schedl M, Ionescu B, Quang VL, Jiang YG (2013) The MediaEval 2013 affect task: violent scenes detection. In: MediaEval workshop","DOI":"10.1109\/CBMI.2014.6849827"},{"key":"90_CR7","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R, Jia Li L, Li K, Fei-Fei L (2009) ImageNet: a large-scale hierarchical image database. In: Proceedings of CVPR","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"90_CR8","doi-asserted-by":"crossref","unstructured":"Ding D, Metze F, Rawat S, Schulam PF, Burger S, Younessian E, Bao L, Christel MG, Hauptmann A (2012) Beyond audio and video retrieval: towards multimedia summarization. In: Proceedings of ICMR, pp 2:1\u20132:8","DOI":"10.1145\/2324796.2324799"},{"key":"90_CR9","doi-asserted-by":"crossref","unstructured":"Farhadi A, Hejrati SMM, Sadeghi MA, Young P, Rashtchian C, Hockenmaier J, Forsyth DA (2010) Every picture tells a story: generating sentences from images. In: Proceedings of ECCV, pp 15\u201329","DOI":"10.1007\/978-3-642-15561-1_2"},{"key":"90_CR10","doi-asserted-by":"crossref","unstructured":"Guadarrama S, Krishnamoorthy N, Malkarnenkar G, Venugopalan S, Mooney RJ, Darrell T, Saenko K (2013) YouTube2Text: recognizing and describing arbitrary activities using semantic hierarchies and zero-shot recognition. In: Proceedings of ICCV, pp 2712\u20132719","DOI":"10.1109\/ICCV.2013.337"},{"key":"90_CR11","unstructured":"Gupta A, Verma Y, Jawahar CV (2012) Choosing linguistics over vision to describe images. In: Proceedings of AAAI"},{"key":"90_CR12","doi-asserted-by":"crossref","unstructured":"Izadinia H, Shah M (2012) Recognizing complex events using large margin joint low-level event model. In: Proceedings of ECCV, pp 430\u2013444","DOI":"10.1007\/978-3-642-33765-9_31"},{"issue":"6","key":"90_CR13","doi-asserted-by":"crossref","first-page":"3080","DOI":"10.1109\/TIP.2012.2188038","volume":"21","author":"YG Jiang","year":"2012","unstructured":"Jiang YG, Dai Q, Wang J, Ngo CW, Xue X, Chang SF (2012) Fast semantic diffusion for large scale context-based image and video annotation. IEEE Trans Image Process 21(6):3080\u20133091","journal-title":"IEEE Trans Image Process"},{"key":"90_CR14","doi-asserted-by":"crossref","unstructured":"Jiang YG, Ngo CW, Chang SF (2009) Semantic context transfer across heterogeneous sources for domain adaptive video search. In: Proceedings of ACM MM, pp 155\u2013164","DOI":"10.1145\/1631272.1631296"},{"key":"90_CR15","doi-asserted-by":"crossref","unstructured":"Jiang YG, Ye G, Chang SF, Ellis D, Loui AC (2011) Consumer video understanding: a benchmark database and an evaluation of human and machine performance. In: Proceedings of ICMR","DOI":"10.1145\/1991996.1992025"},{"key":"90_CR16","doi-asserted-by":"crossref","unstructured":"Khan MUG, Zhang L, Gotoh Y (2011) Towards coherent natural language description of video streams. In: ICCV workshops, pp 664\u2013671","DOI":"10.1109\/ICCVW.2011.6130306"},{"key":"90_CR17","doi-asserted-by":"crossref","unstructured":"Krishnamoorthy N, Malkarnenkar G, Mooney RJ, Saenko K, Guadarrama S (2013) Generating natural-language video descriptions using text-mined knowledge. In: Proceedings of AAAI","DOI":"10.1609\/aaai.v27i1.8679"},{"key":"90_CR18","doi-asserted-by":"crossref","unstructured":"Kulkarni G, Premraj V, Dhar S, Li S, Choi Y, Berg AC, Berg TL (2011) Baby talk: understanding and generating simple image descriptions. In: Proceedings of CVPR, pp 1601\u20131608","DOI":"10.1109\/CVPR.2011.5995466"},{"key":"90_CR19","unstructured":"Kuznetsova P, Ordonez V, Berg AC, Berg TL, Choi Y (2012) Collective generation of natural image descriptions. In: Proceedings of ACL, pp 359\u2013368"},{"key":"90_CR20","unstructured":"Li S, Kulkarni G, Berg TL, Berg AC, Choi Y (2011) Composing simple image descriptions using web-scale n-grams. In: Proceedings of CoNLL, pp 220\u2013228"},{"key":"90_CR21","unstructured":"Lin Y, Michel JB, Aiden EL, Orwant J, Brockman W, Petrov S (2012) Syntactic annotations for the google books n-gram corpus. In: Proceedings of ACL, pp 169\u2013174"},{"issue":"4","key":"90_CR22","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1023\/B:BTTJ.0000047600.45421.6d","volume":"22","author":"H Liu","year":"2004","unstructured":"Liu H, Singh P (2004) Conceptnet\u2014a practical commonsense reasoning tool-kit. BT Technol J 22(4):211\u2013226","journal-title":"BT Technol J"},{"key":"90_CR23","doi-asserted-by":"crossref","unstructured":"Liu J, Yu Q, Javed O, Ali S, Tamrakar A, Divakaran A, Cheng H, Sawhney HS (2013) Video event recognition using concept attributes. In: Proceedings of WACV, pp 339\u2013346","DOI":"10.1109\/WACV.2013.6475038"},{"key":"90_CR24","doi-asserted-by":"crossref","unstructured":"Ma Z, Hauptmann AG, Yang Y, Sebe N (2012) Classifier-specific intermediate representation for multimedia tasks. In: Proceedings of ICMR, pp 50:1\u201350:8","DOI":"10.1145\/2324796.2324854"},{"key":"90_CR25","doi-asserted-by":"crossref","unstructured":"Marsza\u0142ek M, Laptev I, Schmid C (2009) Actions in context. In: Proceedings of CVPR, pp 2929\u20132936","DOI":"10.1109\/CVPR.2009.5206557"},{"key":"90_CR26","unstructured":"Mathieu B, Essid S, Fillon T, Prado J, Richard G (2010) Yaafe, an easy to use and efficient audio feature extraction software. In: Downie JS, Veltkamp RC (eds) Proceedings of ISMIR, pp 441\u2013446"},{"key":"90_CR27","doi-asserted-by":"crossref","unstructured":"Mazloom M, Gavves E, van\u00a0de Sande KEA, Snoek C (2013) Searching informative concept banks for video event detection. In: Proceedings of ICMR, pp 255\u2013262","DOI":"10.1145\/2461466.2461507"},{"issue":"1","key":"90_CR28","doi-asserted-by":"crossref","first-page":"88","DOI":"10.1109\/TMM.2011.2168948","volume":"14","author":"M Merler","year":"2012","unstructured":"Merler M, Huang B, Xie L, Hua G, Natsev A (2012) Semantic model vectors for complex video event recognition. IEEE Trans Multimed 14(1):88\u2013101","journal-title":"IEEE Trans Multimed"},{"key":"90_CR29","unstructured":"Mitchell M, Han X, Dodge J, Mensch A, Goyal A, Berg A, Yamaguchi K, Berg T, Stratos K, Daum\u00e9 III H (2012) Midge: generating image descriptions from computer vision detections. In: Proceedings of EACL, pp 747\u2013756"},{"key":"90_CR30","unstructured":"Natarajan P, Wu S, Luisier F, Zhuang X, Tickoo M, Ye G, Liu D, Chang SF, Saleemi I, Shah M, Davis L, Gupta A, Haritaoglu I, Guler S, Morde A (2013) BBN VISER TRECVID 2013 multimedia event detection and multimedia event recounting systems. In: NIST TRECVID workshop"},{"key":"90_CR31","unstructured":"Nie F, Huang Y, Wang X, Huang H (2014) New primal SVM solver with linear computational cost for big data classifications. In: Proceedings of ICML"},{"key":"90_CR32","unstructured":"NIST, Information Technology Laboratory: 2012 TRECVID Multimedia Event Detection Track"},{"key":"90_CR33","unstructured":"NIST, Information Technology Laboratory: 2013 TRECVID Multimedia Event Recounting Track"},{"key":"90_CR34","unstructured":"Ordonez V, Kulkarni G, Berg TL (2011) Im2Text: describing images using 1 million captioned photographs. In: Proceedings of NIPS, pp 1143\u20131151"},{"key":"90_CR35","doi-asserted-by":"crossref","unstructured":"Rohrbach M, Qiu W, Titov I, Thater S, Pinkal M, Schiele B (2013) Translating video content to natural language descriptions. In: Proceedings of ICCV, pp 433\u2013440","DOI":"10.1109\/ICCV.2013.61"},{"issue":"411","key":"90_CR36","doi-asserted-by":"crossref","first-page":"686","DOI":"10.1080\/01621459.1990.10474928","volume":"85","author":"J Romano","year":"1990","unstructured":"Romano J (1990) On the behavior of randomization tests without a group invariance assumption. J Am Stat Assoc 85(411):686\u2013692","journal-title":"J Am Stat Assoc"},{"key":"90_CR37","doi-asserted-by":"crossref","unstructured":"Snoek CGM, Worring M, van Gemert JC, Geusebroek JM, Smeulders AWM (2006) The challenge problem for automated detection of 101 semantic concepts in multimedia. In: Proceedings of ACM MM, pp 421\u2013430","DOI":"10.1145\/1180639.1180727"},{"key":"90_CR38","unstructured":"Speer R, Havasi C, Lieberman H (2008) Analogyspace: reducing the dimensionality of common sense knowledge. In: Proceedings of AAAI, pp 548\u2013553"},{"key":"90_CR39","doi-asserted-by":"crossref","unstructured":"Sun C, Burns B, Nevatia R, Snoek CGM, Bolles B, Myers GK, Wang W, Yeh E (2014) Isomer: informative segment observations for multimedia event recounting. In: Proceedings of ICMR","DOI":"10.1145\/2578726.2578757"},{"key":"90_CR40","doi-asserted-by":"crossref","unstructured":"Tan CC, Jiang YG, Ngo CW (2011) Towards textually describing complex video contents with audio\u2013visual concept classifiers. In: Proceedings of ACM MM, pp 655\u2013658","DOI":"10.1145\/2072298.2072411"},{"key":"90_CR41","unstructured":"Tan CC, Ngo CW (2013) The vireo team at MediaEval 2013: violent scenes detection by mid-level concepts learnt from youtube. In: MediaEval, Proceedings of CEUR workshop, vol 1043"},{"issue":"3","key":"90_CR42","doi-asserted-by":"crossref","first-page":"107","DOI":"10.1145\/1666420.1666446","volume":"53","author":"A Torralba","year":"2010","unstructured":"Torralba A, Murphy KP, Freeman WT (2010) Using the forest to see the trees: exploiting context for visual object detection and localization. Commun ACM 53(3):107\u2013114","journal-title":"Commun ACM"},{"key":"90_CR43","doi-asserted-by":"crossref","unstructured":"Verma Y, Gupta A, Mannem P, Jawahar CV (2013) Generating image descriptions using semantic similarities in the output space. In: CVPR workshops, pp 288\u2013293","DOI":"10.1109\/CVPRW.2013.50"},{"key":"90_CR44","doi-asserted-by":"crossref","unstructured":"Vinyals O, Toshev A, Bengio S, Erhan D (2014) Show and tell: a neural image caption generator. CoRR","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"90_CR45","doi-asserted-by":"crossref","unstructured":"Wang H, Kl\u00e4ser A, Schmid C, Liu CL (2011) Action recognition by dense trajectories. In: Proceedings of CVPR, pp 3169\u20133176. Colorado Springs, USA","DOI":"10.1109\/CVPR.2011.5995407"},{"issue":"10","key":"90_CR46","doi-asserted-by":"crossref","first-page":"1927","DOI":"10.1109\/TPAMI.2011.273","volume":"34","author":"MF Weng","year":"2012","unstructured":"Weng MF, Chuang YY (2012) Cross-domain multicue fusion for concept-based video indexing. IEEE Trans Pattern Anal Mach Intell 34(10):1927\u20131941","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"90_CR47","unstructured":"Yanagawa A, Chang SF, Kennedy L, Hsu W (2007) Columbia University\u2019s baseline detectors for 374 LSCOM semantic visual concepts. Technical report, Columbia University"},{"issue":"4","key":"90_CR48","doi-asserted-by":"crossref","first-page":"723","DOI":"10.1109\/TPAMI.2011.170","volume":"34","author":"Y Yang","year":"2012","unstructured":"Yang Y, Nie F, Xu D, Luo J, Zhuang Y, Pan Y (2012) A multimedia retrieval framework based on semi-supervised ranking and relevance feedback. IEEE Trans Pattern Anal Mach Intell 34(4):723\u2013742","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"90_CR49","unstructured":"Yang Y, Teo CL, Daum\u00e9 III H, Aloimonos Y (2011) Corpus-guided sentence generation of natural images. In: Proceedings of EMNLP, pp 444\u2013454"},{"issue":"2","key":"90_CR50","doi-asserted-by":"crossref","first-page":"213","DOI":"10.1007\/s11263-006-9794-4","volume":"73","author":"J Zhang","year":"2007","unstructured":"Zhang J, Marsza\u0142ek M, Lazebnik S, Schmid C (2007) Local features and kernels for classification of texture and object categories: a comprehensive study. Int J Comput Vis 73(2):213\u2013238","journal-title":"Int J Comput Vis"}],"container-title":["International Journal of Multimedia Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s13735-015-0090-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s13735-015-0090-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s13735-015-0090-3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,16]],"date-time":"2023-08-16T00:44:24Z","timestamp":1692146664000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s13735-015-0090-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,11,30]]},"references-count":50,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2016,6]]}},"alternative-id":["90"],"URL":"https:\/\/doi.org\/10.1007\/s13735-015-0090-3","relation":{},"ISSN":["2192-6611","2192-662X"],"issn-type":[{"value":"2192-6611","type":"print"},{"value":"2192-662X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015,11,30]]}}}