{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,9]],"date-time":"2026-03-09T14:19:03Z","timestamp":1773065943020,"version":"3.50.1"},"publisher-location":"Cham","reference-count":40,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319056951","type":"print"},{"value":"9783319056968","type":"electronic"}],"license":[{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-319-05696-8_8","type":"book-chapter","created":{"date-parts":[[2014,5,9]],"date-time":"2014-05-09T14:19:39Z","timestamp":1399645179000},"page":"185-208","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Multimodal Violence Detection in Hollywood Movies: State-of-the-Art and Benchmarking"],"prefix":"10.1007","author":[{"given":"Claire-H\u00e9l\u00e8ne","family":"Demarty","sequence":"first","affiliation":[]},{"given":"C\u00e9dric","family":"Penet","sequence":"additional","affiliation":[]},{"given":"Bogdan","family":"Ionescu","sequence":"additional","affiliation":[]},{"given":"Guillaume","family":"Gravier","sequence":"additional","affiliation":[]},{"given":"Mohammad","family":"Soleymani","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2014,3,26]]},"reference":[{"key":"8_CR1","unstructured":"Acar E, Albayrak S (2012) Dai lab at mediaeval 2012 affect task: the detection of violent scenes using affective features. In: MediaEval 2012, multimedia benchmark workshop"},{"key":"8_CR2","unstructured":"Acar E, Spiegel S, Albayrak S (2011) Mediaeval 2011 affect task: Violent scene detection combining audio and visual features with svm. In: MediaEval 2011, multimedia benchmark workshop"},{"key":"8_CR3","doi-asserted-by":"crossref","unstructured":"Baveye Y, Urban F, Chamaret C, Demoulin V, Hellier P (2013) Saliency-guided consistent color harmonization. Computational color imaging, Lecture notes in computer science, vol 7786. Springer, Berlin, pp 105\u2013118","DOI":"10.1007\/978-3-642-36700-7_9"},{"key":"8_CR4","doi-asserted-by":"crossref","unstructured":"Chen LH, Hsu HW, Wang LY, Su CW (2011) Violence detection in movies. In: 8th IEEE international conference on computer graphics, imaging and visualization (CGIV 2011), pp 119\u2013124","DOI":"10.1109\/CGIV.2011.14"},{"key":"8_CR5","doi-asserted-by":"publisher","first-page":"248","DOI":"10.4304\/jmm.4.4.248-253","volume":"4","author":"LH Chen","year":"2009","unstructured":"Chen LH, Su CW, Weng CF, Liao HYM (2009) Action Scene Detection With Support Vector Machines. J Multimedia 4:248\u2013253. doi:10.4304\/jmm.4.4.248-253","journal-title":"J Multimedia"},{"key":"8_CR6","doi-asserted-by":"crossref","unstructured":"Cheng WH, Chu WT, Wu JL (2003) Semantic context detection based on hierarchical audio models. In: Proceedings of the 5th ACM SIGMM international workshop on multimedia information retrieval, pp 109\u2013115","DOI":"10.1145\/973264.973282"},{"key":"8_CR7","doi-asserted-by":"crossref","unstructured":"Cooper GF, Herskovits E (1992) A Bayesian method for the induction of probabilistic networks from data. Mach Learn 9:309\u2013347. http:\/\/dx.doi.org\/10.1007\/BF00994110","DOI":"10.1007\/BF00994110"},{"key":"8_CR8","doi-asserted-by":"crossref","unstructured":"Datta A, Shah M, Da Vitoria Lobo N (2002) Person-on-person violence detection in video data. In: Proceedings of 16th IEEE international conference on pattern recognition, vol 1. pp 433\u2013438","DOI":"10.1109\/ICPR.2002.1044748"},{"key":"8_CR9","doi-asserted-by":"crossref","unstructured":"Demarty CH, Penet C, Gravier G, Soleymani M (2012) A benchmarking campaign for the multimodal detection of violent scenes in movies. In: Computer Vision-ECCV 2012. Workshops and demonstrations, Springer, pp 416\u2013425","DOI":"10.1007\/978-3-642-33885-4_42"},{"key":"8_CR10","unstructured":"Derbas N, Thollard F, Safadi B, Qu\u00e9not G (2012) Lig at mediaeval 2012 affect task: use of a generic method. In: MediaEval 2012, multimedia benchmark workshop"},{"key":"8_CR11","doi-asserted-by":"crossref","unstructured":"de Souza FDM, Ch\u00e1vez GC, do Valle E, de A Araujo A (2010) Violence detection in video using spatio-temporal features. In: 23rd IEEE conference on graphics, patterns and images (SIBGRAPI 2010), pp 224\u2013230","DOI":"10.1109\/SIBGRAPI.2010.38"},{"issue":"7","key":"8_CR12","doi-asserted-by":"publisher","first-page":"1512","DOI":"10.1109\/TIP.2009.2019809","volume":"18","author":"JV de Weijer","year":"2009","unstructured":"de Weijer JV, Schmid C, Verbeek J, Larlus D (2009) Learning color names for real-world applications. IEEE Trans Image Process 18(7):1512\u20131523","journal-title":"IEEE Trans Image Process"},{"key":"8_CR13","unstructured":"Eyben F, Weninger F, Lehment N, Rigoll G, Schuller B (2012) Violent scenes detection with large, brute-forced acoustic and visual feature sets. In: MediaEval 2012, multimedia benchmark workshop"},{"key":"8_CR14","doi-asserted-by":"crossref","unstructured":"Giannakopoulos T, Makris A, Kosmopoulos D, Perantonis S, Theodoridis S (2010) Audio-visual fusion for detecting violent scenes in videos. In: Konstantopoulos S et al (eds) Artificial intelligence: theories, models and applications, Lecture notes in computer scienc, vol 6040. Springer, pp 91\u2013100","DOI":"10.1007\/978-3-642-12842-4_13"},{"key":"8_CR15","unstructured":"Glotin H, Razik J, Paris S, Prevot JM (2011) Real-time entropic unsupervised violent scenes detection in hollywood movies - dyni @ mediaeval affect task 2011. In: MediaEval 2011, multimedia benchmark workshop"},{"key":"8_CR16","unstructured":"Gninkoun G, Soleymani M (2011) Automatic violence scenes detection: a multi-modal approach. In: MediaEval 2011, multimedia benchmark workshop"},{"key":"8_CR17","doi-asserted-by":"crossref","unstructured":"Gong Y, Wang W, Jiang S, Huang Q, Gao W (2008) Detecting violent scenes in movies by auditory and visual cues. In: Huang YM et al (eds) Advances in multimedia information processing - (PCM 2008), Lecture notes in computer science, vol 5353. Springer, pp 317\u2013326","DOI":"10.1007\/978-3-540-89796-5_33"},{"key":"8_CR18","doi-asserted-by":"publisher","unstructured":"Gravier G, Demarty CH, Baghdadi S, Gros P (2012) Classification-oriented structure learning in bayesian networks for multimodal event detection in videos. Multimedia tools and applications, pp 1\u201317. doi: 10.1007\/s11042-012-1169-y, http:\/\/dx.doi.org\/10.1007\/s11042-012-1169-y","DOI":"10.1007\/s11042-012-1169-y"},{"key":"8_CR19","unstructured":"Hinton G, Srivastava N, Krizhevsky A, Sutskever I, Salakhutdinov R (2012) Improving neural networks by preventing co-adaptation of feature detectors. http:\/\/arxiv.org\/abs\/1207.0580"},{"key":"8_CR20","unstructured":"Ionescu B, Buzuloiu V, Lambert P, Coquin D (2006) Improved cut detection for the segmentation of animation movies. In: IEEE international conference on acoustics, speech, and signal processing"},{"key":"8_CR21","doi-asserted-by":"crossref","unstructured":"Ionescu B, Schl\u00fcter J, Mironic\u0103 I, Schedl M (2013) A naive mid-level concept-based fusion approach to violence detection in hollywood movies. In: Proceedings of the 3rd ACM international conference on multimedia retrieval, pp 215\u2013222","DOI":"10.1145\/2461466.2461502"},{"key":"8_CR22","unstructured":"Jiang YG, Dai Q, Tan CC, Xue X, Ngo CW (2012) The shanghai-hongkong team at mediaeval2012: Violent scene detection using trajectory-based features. In: MediaEval 2012, multimedia benchmark workshop"},{"key":"8_CR23","unstructured":"Kriegel B (2003) La violence \u00e0 la t\u00e9l\u00e9vision. rapport de la mission d\u2019\u00e9valuation, d\u2019analyse et de propositions relative aux repr\u00e9sentations violentes \u00e0 la t\u00e9l\u00e9vision. Technical report, Minist\u00e8re de la Culture et de la Communication, Paris"},{"key":"8_CR24","doi-asserted-by":"crossref","unstructured":"Krug EG, Mercy JA, Dahlberg LL, Zwi AB (2002) The world report on violence and health. The Lancet 360(9339):1083\u20131088 (2002). doi: 10.1016\/S0140-6736(02)11133-0. http:\/\/www.sciencedirect.com\/science\/article\/pii\/S0140673602111330","DOI":"10.1016\/S0140-6736(02)11133-0"},{"key":"8_CR25","unstructured":"Lam V, Le DD, Le SP, Satoh S, Duong DA (2012) Nii, Japan at mediaeval 2012 violent scenes detection affect task. In: MediaEval 2011, multimedia benchmark workshop"},{"key":"8_CR26","unstructured":"Lam V, Le DD, Satoh S, Duong, DA (2011) Nii, Japan at mediaeval 2011 violent scenes detection task. In: MediaEval 2011, multimedia benchmark workshop"},{"key":"8_CR27","doi-asserted-by":"crossref","unstructured":"Lin J, Wang W (2009) Weakly-supervised violence detection in movies with audio and video based co-training. In: Advances in multimedia information processing-PCM 2009, Springer, pp 930\u2013935","DOI":"10.1007\/978-3-642-10467-1_84"},{"key":"8_CR28","doi-asserted-by":"crossref","unstructured":"Lucas P (2002) Restricted Bayesian network structure learning. In: Advances in Bayesian networks, studies in fuzziness and soft computing, pp 217\u2013232","DOI":"10.1007\/978-3-540-39879-0_12"},{"key":"8_CR29","unstructured":"Ludwig O, Delgado D, Goncalves V, Nunes U (2009) Trainable classifier-fusion schemes: An application to pedestrian detection. In: IEEE internation conference on intelligent transportation systems, pp 432\u2013437"},{"key":"8_CR30","unstructured":"Martin V, Glotin H, Paris S, Halkias X, Prevot JM (2012) Violence detection in video by large scale multi-scale local binary pattern dynamics. In: MediaEval 2012, multimedia benchmark workshop"},{"key":"8_CR31","doi-asserted-by":"crossref","unstructured":"Nam J, Alghoniemy M, Tewfik AH (1998) Audio-visual content-based violent scene characterization. In: Proceedings of IEEE international conference on image processing (ICIP-98), vol 1. pp 353\u2013357","DOI":"10.1109\/ICIP.1998.723496"},{"key":"8_CR32","doi-asserted-by":"crossref","unstructured":"Nievas EB, Suarez OD, Garc\u00eda GB, Sukthankar R (2011) Violence detection in video using computer vision techniques. In: Computer analysis of images and patterns, Springer, pp 332\u2013339","DOI":"10.1007\/978-3-642-23678-5_39"},{"key":"8_CR33","unstructured":"Penet C, Demarty CH, Gravier G, Gros P (2011) Technicolor and inria\/irisa at mediaeval 2011: learning temporal modality integration with bayesian networks. In: MediaEval 2011, Multimedia Benchmark Workshop, CEUR Workshop Proceedings, vol 807. http:\/\/CEUR-WS.org"},{"key":"8_CR34","doi-asserted-by":"crossref","unstructured":"Penet C, Demarty CH, Gravier G, Gros P (2013) Audio event detection in movies using multiple audio words and contextual Bayesian networks. In: Workshop on content-based multimedia indexing","DOI":"10.1109\/CBMI.2013.6576546"},{"key":"8_CR35","unstructured":"Penet C, Demarty CH, Soleymani M, Gravier G, Gros P (2012) Technicolor\/inria\/imperial college london at the mediaeval 2012 violent scene detection task. In: MediaEval 2012, multimedia benchmark workshop"},{"key":"8_CR36","doi-asserted-by":"publisher","first-page":"533","DOI":"10.1038\/323533a0","volume":"323","author":"DE Rumelhart","year":"1986","unstructured":"Rumelhart DE, Hinton GE, Williams RJ (1986) Learning representations by back-propagating errors. Nature 323:533\u2013536","journal-title":"Nature"},{"key":"8_CR37","unstructured":"Safadi B, Qu\u00e9enot G (2011) Lig at mediaeval 2011 affect task: use of a generic method. In: MediaEval 2011, multimedia benchmark, workshop"},{"key":"8_CR38","doi-asserted-by":"crossref","unstructured":"Schl\u00fcter J, Ionescu B, Mironic\u0103 I, Schedl M (2012) Arf @ mediaeval 2012: an uninformed approach to violence detection in hollywood movies. In: MediaEval 2012, multimedia benchmark, workshop","DOI":"10.1145\/2461466.2461502"},{"key":"8_CR39","unstructured":"Violence (1996) A public health priority. Technical Report, World Health Organization, Geneva, WHO\/EHA\/SPI.POA.2"},{"key":"8_CR40","doi-asserted-by":"crossref","unstructured":"Zajdel W, Krijnders JD, Andringa T, Gavrila DM (2007) Cassandra: audio-video sensor fusion for aggression detection. In: IEEE conference on advanced video and signal based surveillance (AVSS 2007), pp 200\u2013205","DOI":"10.1109\/AVSS.2007.4425310"}],"container-title":["Advances in Computer Vision and Pattern Recognition","Fusion in Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-05696-8_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,20]],"date-time":"2023-02-20T02:07:58Z","timestamp":1676858878000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-05696-8_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783319056951","9783319056968"],"references-count":40,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-05696-8_8","relation":{},"ISSN":["2191-6586","2191-6594"],"issn-type":[{"value":"2191-6586","type":"print"},{"value":"2191-6594","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014]]},"assertion":[{"value":"26 March 2014","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}