{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,9]],"date-time":"2026-04-09T07:30:09Z","timestamp":1775719809652,"version":"3.50.1"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2016,6,10]],"date-time":"2016-06-10T00:00:00Z","timestamp":1465516800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2017,5]]},"DOI":"10.1007\/s11042-016-3618-5","type":"journal-article","created":{"date-parts":[[2016,6,10]],"date-time":"2016-06-10T08:25:22Z","timestamp":1465547122000},"page":"11809-11837","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":27,"title":["A comprehensive study on mid-level representation and ensemble learning for emotional analysis of video material"],"prefix":"10.1007","volume":"76","author":[{"given":"Esra","family":"Acar","sequence":"first","affiliation":[]},{"given":"Frank","family":"Hopfgartner","sequence":"additional","affiliation":[]},{"given":"Sahin","family":"Albayrak","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,6,10]]},"reference":[{"key":"3618_CR1","unstructured":"Acar E, Hopfgartner F, Albayrak S (2014) Understanding affective content of music videos through learned representations International conference on multimedia modelling (MMM), pp. 303\u2013314"},{"key":"3618_CR2","doi-asserted-by":"publisher","unstructured":"Acar E, Hopfgartner F, Albayrak S (2015) Fusion of learned multi-modal representations and dense trajectories for emotional analysis in videos. In: IEEE international workshop on content-based multimedia indexing (CBMI), pp. 1\u20136","DOI":"10.1109\/CBMI.2015.7153603"},{"key":"3618_CR3","doi-asserted-by":"publisher","unstructured":"Baveye Y, Bettinelli J, Dellandr\u00e9a E, Chen L, Chamaret C (2013) A large video database for computational models of induced emotion. In: Humaine association conference on affective computing and intelligent interaction (ACII), pp. 13\u201318","DOI":"10.1109\/ACII.2013.9"},{"key":"3618_CR4","unstructured":"Baveye Y, Dellandr\u00e9a E, Chamaret C, Chen L (2015) Deep learning vs. kernel methods: Performance for emotion prediction in videos. In: International conference on affective computing and intelligent interaction (ACII), pp. 77\u201383"},{"issue":"1","key":"3618_CR5","doi-asserted-by":"publisher","first-page":"43","DOI":"10.1109\/TAFFC.2015.2396531","volume":"6","author":"Y Baveye","year":"2015","unstructured":"Baveye Y, Dellandr\u00e9a E, Chamaret C, Chen L (2015) LIRIS-ACCEDE: A video database for affective content analysis. IEEE Trans. Affect. Comput 6(1):43\u201355","journal-title":"IEEE Trans. Affect. Comput"},{"issue":"8","key":"3618_CR6","doi-asserted-by":"publisher","first-page":"1798","DOI":"10.1109\/TPAMI.2013.50","volume":"35","author":"Y Bengio","year":"2013","unstructured":"Bengio Y, Courville A, Vincent P (2013) Representation learning: A review and new perspectives. IEEE Trans. Pattern Anal. Mach. Intell 35(8):1798\u20131828","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell"},{"key":"3618_CR7","doi-asserted-by":"publisher","unstructured":"Borth D, Chen T, Ji R, Chang S (2013) Sentibank: large-scale ontology and classifiers for detecting sentiment and emotions in visual content. In: ACM international conference on multimedia (ACMMM), pp. 459\u2013460","DOI":"10.1145\/2502081.2502268"},{"issue":"4","key":"3618_CR8","doi-asserted-by":"publisher","first-page":"636","DOI":"10.1109\/TCSVT.2012.2211935","volume":"23","author":"L Canini","year":"2013","unstructured":"Canini L, Benini S, Leonardi R (2013) Affective recommendation of movies based on selected connotative features. IEEE Trans. Circuits Syst. Video Technol 23 (4):636\u2013647","journal-title":"IEEE Trans. Circuits Syst. Video Technol"},{"issue":"3","key":"3618_CR9","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1961189.1961199","volume":"2","author":"C Chang","year":"2011","unstructured":"Chang C, Lin C (2011) LIBSVM: a library for support vector machines. ACM Trans. Intell. Syst. Technol 2(3):1\u201327","journal-title":"ACM Trans. Intell. Syst. Technol"},{"key":"3618_CR10","first-page":"8586","volume":"abs\/1410","author":"T Chen","year":"2014","unstructured":"Chen T, Borth D, Darrell T, Chang S (2014) Deepsentibank: Visual sentiment concept classification with deep convolutional neural networks. Commun. Res. Rep abs\/1410:8586","journal-title":"Commun. Res. Rep"},{"key":"3618_CR11","doi-asserted-by":"publisher","unstructured":"Chen T, Yu F X, Chen J, Cui Y, Chen Y, Chang S (2014) Object-based visual sentiment concept analysis and application. In: ACM international conference on multimedia (ACMMM), pp. 367\u2013 376","DOI":"10.1145\/2647868.2654935"},{"key":"3618_CR12","doi-asserted-by":"crossref","unstructured":"Dumoulin J, Affi D, Mugellini E, Khaled O A, Bertini M, Bimbo A D (2015) Affect recognition in a realistic movie dataset using a hierarchical approach. In: First international workshop on affect andamp; sentiment in multimedia (ASM), pp. 15\u201320","DOI":"10.1145\/2813524.2813526"},{"issue":"2","key":"3618_CR13","doi-asserted-by":"publisher","first-page":"407","DOI":"10.1214\/009053604000000067","volume":"32","author":"B Efron","year":"2004","unstructured":"Efron B, Hastie T, Johnstone I, Tibshirani R (2004) Least angle regression. Ann. Stat 32(2):407\u2013 499","journal-title":"Ann. Stat"},{"key":"3618_CR14","doi-asserted-by":"publisher","unstructured":"Eggink J, Bland D (2012) A large scale experiment for mood-based classification of tv programmes. In: IEEE international conference on multimedia and expo (ICME), pp. 140\u2013145","DOI":"10.1109\/ICME.2012.68"},{"key":"3618_CR15","doi-asserted-by":"publisher","unstructured":"Ellis J G, Lin W S, Lin C, Chang S (2014) Predicting evoked emotions in video. In: IEEE international symposium on multimedia (ISM), pp. 287\u2013294","DOI":"10.1109\/ISM.2014.69"},{"key":"3618_CR16","first-page":"975","volume":"5","author":"T Fan Wu","year":"2003","unstructured":"Fan Wu T, Lin C J, Weng R C (2003) Probability estimates for multi-class classification by pairwise coupling. J. Mach. Learn. Res 5:975\u20131005","journal-title":"J. Mach. Learn. Res"},{"issue":"2","key":"3618_CR17","doi-asserted-by":"publisher","first-page":"120","DOI":"10.1016\/j.imavis.2012.06.016","volume":"31","author":"H Gunes","year":"2013","unstructured":"Gunes H, Schuller B (2013) Categorical and dimensional affect analysis in continuous input: current trends and future directions. Image Vis. Comput 31(2):120\u2013136","journal-title":"Image Vis. Comput"},{"key":"3618_CR18","doi-asserted-by":"publisher","unstructured":"Irie G, Hidaka K, Satou T, Yamasaki T, Aizawa K (2009) Affective video segment retrieval for consumer generated videos based on correlation between emotions and emotional audio events. In: IEEE international conference on multimedia and expo (ICME), pp. 522\u2013525","DOI":"10.1109\/ICME.2009.5202548"},{"issue":"6","key":"3618_CR19","doi-asserted-by":"publisher","first-page":"523","DOI":"10.1109\/TMM.2010.2051871","volume":"12","author":"G Irie","year":"2010","unstructured":"Irie G, Satou T, Kojima A, Yamasaki T, Aizawa K (2010) Affective audio-visual words and latent topic driving model for realizing movie affective scene classification. IEEE Trans. Multimedia 12(6):523\u2013535","journal-title":"IEEE Trans. Multimedia"},{"issue":"6","key":"3618_CR20","doi-asserted-by":"publisher","first-page":"720","DOI":"10.1109\/76.927428","volume":"11","author":"S Jeannin","year":"2001","unstructured":"Jeannin S, Divakaran A (2001) Mpeg-7 visual motion descriptors. IEEE Trans. Circuits Syst. Video Technol 11(6):720\u2013724","journal-title":"IEEE Trans. Circuits Syst. Video Technol"},{"issue":"1","key":"3618_CR21","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1109\/TPAMI.2012.59","volume":"35","author":"S Ji","year":"2013","unstructured":"Ji S, Xu W, Yang M, Yu K (2013) 3d convolutional neural networks for human action recognition. IEEE Trans. Pattern Anal. Mach. Intell 35(1):221\u2013231","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell"},{"key":"3618_CR22","doi-asserted-by":"publisher","unstructured":"Jia Y, Shelhamer E, Donahue J, Karayev S, Long J, Girshick R, Guadarrama S, Darrell T (2014) Caffe: Convolutional architecture for fast feature embedding. In: ACM international conference on multimedia (ACMMM), pp. 675\u2013678","DOI":"10.1145\/2647868.2654889"},{"key":"3618_CR23","doi-asserted-by":"crossref","unstructured":"Jiang Y, Xu B, Xue X (2014) Predicting emotions in user-generated videos. In: The AAAI conference on artificial intelligence (AAAI)","DOI":"10.1609\/aaai.v28i1.8724"},{"issue":"1","key":"3618_CR24","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1109\/T-AFFC.2011.15","volume":"3","author":"S Koelstra","year":"2012","unstructured":"Koelstra S, M\u00fchl C, Soleymani M, Lee J, Yazdani A, Ebrahimi T, Pun T, Nijholt A, Patras I (2012) Deap: A database for emotion analysis; using physiological signals. IEEE Trans. Affect. Comput 3(1):18\u201331","journal-title":"IEEE Trans. Affect. Comput"},{"key":"3618_CR25","unstructured":"Krizhevsky A, Sutskever I, Hinton G E (2012) Imagenet classification with deep convolutional neural networks. In: Advances in neural information processing systems (NIPS), pp. 1097\u20131105"},{"key":"3618_CR26","unstructured":"Li T L, Chan A B, Chun A H (2010) Automatic musical pattern feature extraction using convolutional neural network. In: International multiconference of engineers and computer scientists (IMECS)"},{"key":"3618_CR27","first-page":"19","volume":"11","author":"J Mairal","year":"2010","unstructured":"Mairal J, Bach F, Ponce J, Sapiro G (2010) Online learning for matrix factorization and sparse coding. J. Mach. Learn. Res 11:19\u201360","journal-title":"J. Mach. Learn. Res"},{"key":"3618_CR28","doi-asserted-by":"crossref","unstructured":"Niu J, Zhao X, Abdul Aziz M A (2015) A novel affect-based model of similarity measure of videos. Neurocomputing (in press)","DOI":"10.1016\/j.neucom.2015.01.104"},{"key":"3618_CR29","doi-asserted-by":"publisher","unstructured":"Pang L, Ngo C W (2015) Multimodal learning with deep boltzmann machine for emotion prediction in user generated videos. In: ACM international conference on multimedia retrieval (ICMR), pp. 619\u2013622","DOI":"10.1145\/2671188.2749400"},{"key":"3618_CR30","unstructured":"Plutchik R, Kellerman H (1986) Emotion: theory research and experience, vol 3. Academic press, New York"},{"key":"3618_CR31","doi-asserted-by":"publisher","unstructured":"Safadi B, Qu\u00e9not G (2015) A factorized model for multiple SVM and multi-label classification for large scale multimedia indexing. In: 13th international workshop on content-based multimedia indexing, CBMI 2015, Prague, Czech Republic, June 10-12, 2015, pp. 1\u20136","DOI":"10.1109\/CBMI.2015.7153610"},{"key":"3618_CR32","unstructured":"Schmidt E, Scott J, Kim Y (2012) Feature learning in dynamic environments: Modeling the acoustic structure of musical emotion. In: International society for music information retrieval conference (ISMIR), pp. 325\u2013330"},{"key":"3618_CR33","doi-asserted-by":"publisher","unstructured":"Soleymani M, Aljanaki A, Wiering F, Veltkamp R C (2015) Content-based music recommendation using underlying music preference structure. In: 2015 IEEE international conference on multimedia and expo (ICME), pp. 1\u20136","DOI":"10.1109\/ICME.2015.7177504"},{"key":"3618_CR34","unstructured":"Sturm B L, Noorzad P (2012) On automatic music genre recognition by sparse representation classification using auditory temporal modulations. In: International symposium on computer music modeling and retrieval, pp. 379\u2013394"},{"issue":"4","key":"3618_CR35","doi-asserted-by":"publisher","first-page":"394","DOI":"10.1037\/0096-3445.123.4.394","volume":"123","author":"P Valdez","year":"1994","unstructured":"Valdez P, Mehrabian A (1994) Effects of color on emotions. J. Exp. Psychol. Gen 123(4):394\u2013 409","journal-title":"J. Exp. Psychol. Gen"},{"key":"3618_CR36","doi-asserted-by":"publisher","unstructured":"Wang H, Schmid C (2013) Action recognition with improved trajectories. In: Proc. IEEE international conference on computer vision (ICCV), pp. 3551\u20133558","DOI":"10.1109\/ICCV.2013.441"},{"issue":"6","key":"3618_CR37","doi-asserted-by":"publisher","first-page":"689","DOI":"10.1109\/TCSVT.2006.873781","volume":"16","author":"HL Wang","year":"2006","unstructured":"Wang H L, Cheong L (2006) Affective understanding in film. IEEE Trans. Circuits Syst. Video Technol 16(6):689\u2013704","journal-title":"IEEE Trans. Circuits Syst. Video Technol"},{"issue":"4","key":"3618_CR38","doi-asserted-by":"publisher","first-page":"410","DOI":"10.1109\/TAFFC.2015.2432791","volume":"6","author":"S Wang","year":"2015","unstructured":"Wang S, Ji Q (2015) Video affective content analysis: A survey of state-of-the-art methods. IEEE Trans. Affect. Comput 6(4):410\u2013430","journal-title":"IEEE Trans. Affect. Comput"},{"key":"3618_CR39","unstructured":"Wimmer M, Schuller B, Arsic D, Rigoll G, Radig B (2008) Low-level fusion of audio and video feature for multi-modal emotion recognition. In: International joint conference on computer vision, imaging and computer graphics theory and applications, pp. 145\u2013151"},{"key":"3618_CR40","first-page":"04798","volume":"abs\/1511","author":"B Xu","year":"2015","unstructured":"Xu B, Fu Y, Jiang Y, Li B, Sigal L (2015) Heterogeneous knowledge transfer in video emotion recognition, attribution and summarization. Commun. Res. Rep abs\/1511:04798","journal-title":"Commun. Res. Rep"},{"key":"3618_CR41","first-page":"5731","volume":"abs\/1411","author":"C Xu","year":"2014","unstructured":"Xu C, Cetintas S, Lee K, Li L (2014) Visual sentiment prediction with deep convolutional neural networks. Commun. Res. Rep abs\/1411:5731","journal-title":"Commun. Res. Rep"},{"issue":"2","key":"3618_CR42","doi-asserted-by":"publisher","first-page":"757","DOI":"10.1007\/s11042-012-1046-8","volume":"70","author":"M Xu","year":"2014","unstructured":"Xu M, Wang J, He X, Jin J S, Luo S, Lu H (2014) A three-level framework for affective content analysis and its case studies. Multimedia Tools and Applications 70(2):757\u2013779","journal-title":"Multimedia Tools and Applications"},{"issue":"2","key":"3618_CR43","doi-asserted-by":"publisher","first-page":"824","DOI":"10.1109\/18.119739","volume":"38","author":"X Yang","year":"1992","unstructured":"Yang X, Wang K, Shamma S A (1992) Auditory representations of acoustic signals. IEEE Trans. Inf. Theory 38(2):824\u2013839","journal-title":"IEEE Trans. Inf. Theory"},{"key":"3618_CR44","doi-asserted-by":"publisher","unstructured":"Yazdani A, Kappeler K, Ebrahimi T (2011) Affective content analysis of music video clips. In: ACM international workshop on music information retrieval with user-centered and multimodal strategies (MIRUM), pp. 7\u201312","DOI":"10.1145\/2072529.2072532"},{"key":"3618_CR45","doi-asserted-by":"publisher","unstructured":"Yucel Z, Salah A A (2009) Resolution of focus of attention using gaze direction estimation and saliency computation. In: International conference on affective computing and intelligent interaction (ACII), pp. 1\u20136","DOI":"10.1109\/ACII.2009.5349547"},{"key":"3618_CR46","doi-asserted-by":"crossref","unstructured":"Zhou Z (2012) Ensemble methods: foundations and algorithms CRC Press","DOI":"10.1201\/b12207"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11042-016-3618-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-016-3618-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-016-3618-5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-016-3618-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,1]],"date-time":"2022-07-01T11:43:56Z","timestamp":1656675836000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11042-016-3618-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,6,10]]},"references-count":46,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2017,5]]}},"alternative-id":["3618"],"URL":"https:\/\/doi.org\/10.1007\/s11042-016-3618-5","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,6,10]]}}}