{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,28]],"date-time":"2025-10-28T03:11:35Z","timestamp":1761621095393,"version":"3.40.2"},"reference-count":51,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2012,2,26]],"date-time":"2012-02-26T00:00:00Z","timestamp":1330214400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2012,6]]},"DOI":"10.1007\/s00530-012-0257-1","type":"journal-article","created":{"date-parts":[[2012,2,25]],"date-time":"2012-02-25T02:17:08Z","timestamp":1330136228000},"page":"231-250","source":"Crossref","is-referenced-by-count":4,"title":["Semi-supervised context adaptation: case study of audience excitement recognition"],"prefix":"10.1007","volume":"18","author":[{"given":"Elena","family":"Vildjiounaite","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vesa","family":"Kyll\u00f6nen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Satu-Marja","family":"M\u00e4kel\u00e4","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Olli","family":"Vuorinen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tommi","family":"Ker\u00e4nen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Johannes","family":"Peltola","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Georgy","family":"Gimel\u2019farb","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2012,2,26]]},"reference":[{"key":"257_CR1","doi-asserted-by":"crossref","first-page":"345","DOI":"10.1007\/s00530-010-0182-0","volume":"16","author":"PK Atrey","year":"2010","unstructured":"Atrey, P.K., Hossain, M.A., El Saddik, A., Kankanhalli, M.S.: Multimodal fusion for multimedia analysis: a survey. Multime Sys 16, 345\u2013379 (2010)","journal-title":"Multime Sys"},{"key":"257_CR2","volume-title":"Pattern recognition and machine learning","author":"Ch Bishop","year":"2006","unstructured":"Bishop, Ch.: Pattern recognition and machine learning. Springer, Berlin (2006)"},{"issue":"3","key":"257_CR3","doi-asserted-by":"crossref","first-page":"416","DOI":"10.1109\/TSMCC.2008.919173","volume":"38","author":"D Brezeale","year":"2008","unstructured":"Brezeale, D., Cook, D.J.: Automatic video classification: a survey of the literature. IEEE Trans Sys Man Cybern Part C Appl Rev 38(3), 416\u2013430 (2008)","journal-title":"IEEE Trans Sys Man Cybern Part C Appl Rev"},{"key":"257_CR4","doi-asserted-by":"crossref","unstructured":"Butko, T., Pla, F., Segura, C., Nadeu, C., Hernando, J.: Two-source acoustic event detection and localisation: online implementation in a smart-room. In: Proceedings of EUSIPCO, pp. 1317\u20131321 (2011)","DOI":"10.1155\/2011\/485738"},{"issue":"3","key":"257_CR5","doi-asserted-by":"crossref","first-page":"1026","DOI":"10.1109\/TSA.2005.857575","volume":"14","author":"R Cai","year":"2006","unstructured":"Cai, R., Hanjalic, A.: A flexible framework for key audio effects detection and auditory context inference. IEEE Trans Audio Speech Lang Process 14(3), 1026\u20131039 (2006)","journal-title":"IEEE Trans Audio Speech Lang Process"},{"issue":"1","key":"257_CR6","doi-asserted-by":"crossref","first-page":"18","DOI":"10.1109\/T-AFFC.2010.1","volume":"1","author":"R Calvo","year":"2010","unstructured":"Calvo, R., D\u2019Mello, S.: Affect detection: an interdisciplinary review of models, methods, and their applications. IEEE Trans Affect Comput 1(1), 18\u201337 (2010)","journal-title":"IEEE Trans Affect Comput"},{"issue":"13\u201315","key":"257_CR7","doi-asserted-by":"crossref","first-page":"2553","DOI":"10.1016\/j.neucom.2007.11.043","volume":"71","author":"G Caridakis","year":"2008","unstructured":"Caridakis, G., Karpouzis, K., Kollias, S.: User and context adaptive neural networks for emotion recognition. Neurocomputing 71(13\u201315), 2553\u20132562 (2008)","journal-title":"Neurocomputing"},{"key":"257_CR8","unstructured":"Carnegie Mellon University image database: http:\/\/vasc.ri.cmu.edu\/idb\/index.html"},{"issue":"12","key":"257_CR9","doi-asserted-by":"crossref","first-page":"1553","DOI":"10.1109\/TPAMI.2004.127","volume":"26","author":"I Cohen","year":"2004","unstructured":"Cohen, I., Cozman, F.G., Sebe, N., Cirelo, M.C., Huang, T.S.: Semisupervised learning of classifiers: theory, algorithms, and their application to human-computer interaction. IEEE Trans Pattern Anal Mach Intell 26(12), 1553\u20131567 (2004)","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"257_CR10","doi-asserted-by":"crossref","first-page":"4","DOI":"10.1007\/s007790170019","volume":"5","author":"A Dey","year":"2001","unstructured":"Dey, A.: Understanding and using context. Pers Ubiquitous Comput 5, 4\u20137 (2001)","journal-title":"Pers Ubiquitous Comput"},{"key":"257_CR11","doi-asserted-by":"crossref","unstructured":"Douglas-Cowie, E., et al.: The HUMAINE database: addressing the collection and annotation of naturalistic and induced emotional data. In: Proceedings of the ACII 2007, LNCS 4738, pp. 488\u2013500 (2007)","DOI":"10.1007\/978-3-540-74889-2_43"},{"key":"257_CR12","doi-asserted-by":"crossref","unstructured":"Ellis, D.P.W., Xiaohong Zeng, McDermott, J.H.: Classifying soundtracks with audio texture features. In: Proceedings of the ICASSP 2011, pp. 5880\u20135883 (2011)","DOI":"10.1109\/ICASSP.2011.5947699"},{"key":"257_CR13","unstructured":"Feret: http:\/\/www.itl.nist.gov\/iad\/humanid\/feret\/feret_master.html"},{"key":"257_CR14","unstructured":"Forbes-Riley, K., Litman, D.: Predicting emotion in spoken dialogue from multiple knowledge sources. In: Proceedings of the HLT\/NAACL 2004, pp. 201\u2013208 (2004)"},{"issue":"1","key":"257_CR15","doi-asserted-by":"crossref","first-page":"68","DOI":"10.4018\/jse.2010101605","volume":"1","author":"H Gunes","year":"2010","unstructured":"Gunes, H., Pantic, M.: Automatic, dimensional and continuous emotion recognition. Int J Synth Emot 1(1), 68\u201399 (2010)","journal-title":"Int J Synth Emot"},{"key":"257_CR16","doi-asserted-by":"crossref","first-page":"33","DOI":"10.1007\/s12193-009-0025-5","volume":"3","author":"L Kessous","year":"2010","unstructured":"Kessous, L., Castellano, G., Caridakis, G.: Multimodal emotion recognition in speech-based interaction using facial expression, body gesture and acoustic analysis. J Multimodal User Interfaces 3, 33\u201348 (2010)","journal-title":"J Multimodal User Interfaces"},{"key":"257_CR17","doi-asserted-by":"crossref","unstructured":"Lahti, T., Helen, M., Vuorinen, O., V\u00e4yrynen, E., Partala, J., Peltola, J., M\u00e4kel\u00e4, S.-M.: On enabling techniques for personal audio content management. In: ACM international conference on multimedia information retrieval, pp. 113\u2013120 (2008)","DOI":"10.1145\/1460096.1460116"},{"key":"257_CR18","doi-asserted-by":"crossref","unstructured":"Laskowski, K.: Contrasting emotion-bearing laughter types in multiparticipant vocal activity detection for meetings. In: Proceedings of the ICASSP 2009, pp. 4765\u20134768 (2009)","DOI":"10.1109\/ICASSP.2009.4960696"},{"issue":"1","key":"257_CR19","doi-asserted-by":"crossref","first-page":"93","DOI":"10.1109\/TSMCA.2004.838454","volume":"35","author":"X Li","year":"2005","unstructured":"Li, X., Ji, Q.: Active affective state detection and user assistance with dynamic Bayesian networks. IEEE Trans Sys Man Cybern Part A Sys Hum 35(1), 93\u2013105 (2005)","journal-title":"IEEE Trans Sys Man Cybern Part A Sys Hum"},{"key":"257_CR20","unstructured":"Lu, L.: Content discovery from composite audio: an unsupervised approach, PhD Thesis, Delft University of Technology, http:\/\/homepage.tudelft.nl\/c7c8y\/Theses\/PhDThesisLieLu.pdf (2009)"},{"key":"257_CR21","unstructured":"Lucas, B.D., Kanade, T.: An iterative image registration technique with an application to stereo vision. In: Proceedings of the DARPA imaging understanding workshop, Washington, DC, pp. 121\u2013130, April 1981"},{"issue":"2","key":"257_CR22","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/1149290.1149292","volume":"3","author":"L Ma","year":"2006","unstructured":"Ma, L., Milner, B., Smith, D.: Acoustic environment classification. ACM Trans Speech Lang Process 3(2), 1\u201322 (2006)","journal-title":"ACM Trans Speech Lang Process"},{"key":"257_CR23","doi-asserted-by":"crossref","unstructured":"M\u00e4kel\u00e4, S.-M., Peltola, J., Myllyniemi, M.: Mobile video capture targeted narrowband audio content classification. In: Proceedings of the ICASSP 2006, p V-525-8 (2006)","DOI":"10.1109\/ICASSP.2006.1661328"},{"key":"257_CR24","doi-asserted-by":"crossref","unstructured":"Nicolaou, M., Gunes, H., Pantic, M.: Continuous prediction of spontaneous affect from multiple cues and modalities in valence\u2013arousal space. In: IEEE transactions on affective computing, special issue on affect based human behavior understanding (2011)","DOI":"10.1109\/T-AFFC.2011.9"},{"key":"257_CR25","unstructured":"OpenCV: http:\/\/sourceforge.net\/projects\/opencvlibrary\/"},{"key":"257_CR26","doi-asserted-by":"crossref","unstructured":"Otsuka, I., Shipman, S., Divakaran, A.: A video browsing enabled personal video recorder, multimedia content analysis, pp. 1\u201312 (2009)","DOI":"10.1007\/978-0-387-76569-3_14"},{"issue":"2","key":"257_CR27","doi-asserted-by":"crossref","first-page":"257","DOI":"10.1109\/5.18626","volume":"77","author":"LR Rabiner","year":"1986","unstructured":"Rabiner, L.R.: A tutorial on hidden Markov models and selected applications in speech recognition. Proc IEEE 77(2), 257\u2013286 (1986)","journal-title":"Proc IEEE"},{"key":"257_CR28","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1155\/ASP\/2006\/89013","volume":"2006","author":"R Radhakrishnan","year":"2006","unstructured":"Radhakrishnan, R., Divakaran, A., Xiong, Z., Otsuka, I.: A content-adaptive analysis and representation framework for audio event discovery from \u201cunscripted\u201d multimedia. EURASIP J Appl Signal Process 2006, 1\u201324 (2006)","journal-title":"EURASIP J Appl Signal Process"},{"key":"257_CR29","doi-asserted-by":"crossref","unstructured":"Sano, M., Sumiyoshi, H., Shibata, M., Yagi, N.: Generating metadata from acoustic and speech data in live broadcasting. In: Proceedings of the ICASSP 2005, vol. 2, pp. 1145\u20131148 (2005)","DOI":"10.1109\/ICASSP.2005.1415612"},{"key":"257_CR30","doi-asserted-by":"crossref","unstructured":"Schuller, B., Villar, R., Rigoll, G., Lang, M.: Meta-classifiers in acoustic and linguistic feature fusion-based affect recognition. In: Proceedings of the ICASSP05, pp. 325\u2013328 (2005)","DOI":"10.1109\/ICASSP.2005.1415116"},{"key":"257_CR31","doi-asserted-by":"crossref","first-page":"1760","DOI":"10.1016\/j.imavis.2009.02.013","volume":"27","author":"B Schuller","year":"2009","unstructured":"Schuller, B., M\u00fcller, R., Eyben, F., Gast, J., H\u00f6rnler, B., W\u00f6llmer, M., Rigoll, G., H\u00f6thker, A., Konosu, H.: Being bored? Recognising natural interest by extensive audiovisual integration for real-life application. Image Vis Comput 27, 1760\u20131774 (2009)","journal-title":"Image Vis Comput"},{"issue":"2","key":"257_CR32","doi-asserted-by":"crossref","first-page":"119","DOI":"10.1109\/T-AFFC.2010.8","volume":"1","author":"B Schuller","year":"2010","unstructured":"Schuller, B., Vlasenko, B., Eyben, F., Wollmer, M., Stuhlsatz, A., Wendemuth, A., Rigoll, G.: Cross-corpus acoustic emotion recognition: variances and strategies. IEEE Trans Affect Comput 1(2), 119\u2013131 (2010)","journal-title":"IEEE Trans Affect Comput"},{"key":"257_CR33","doi-asserted-by":"crossref","unstructured":"Song, M., Bu, J., Chen, Ch., Li, N.: Audio-visual based emotion recognition: a new approach. In: Proceedings of the CVPR 2004, vol. 2, pp. 1020\u20131025 (2004)","DOI":"10.1109\/CVPR.2004.1315276"},{"issue":"4","key":"257_CR34","doi-asserted-by":"crossref","first-page":"341","DOI":"10.1023\/A:1008202821328","volume":"11","author":"R Storn","year":"1997","unstructured":"Storn, R., Price, K.: Differential evolution\u2014a simple and efficient heuristic for global optimization over continuous spaces. J Glob Optim 11(4), 341\u2013359 (1997)","journal-title":"J Glob Optim"},{"issue":"6","key":"257_CR35","doi-asserted-by":"crossref","first-page":"502","DOI":"10.1109\/TMM.2010.2058095","volume":"12","author":"A Tawari","year":"2010","unstructured":"Tawari, A., Trievedi, M.: Speech emotion analysis: exploring the role of context. IEEE Trans Multimed 12(6), 502\u2013509 (2010)","journal-title":"IEEE Trans Multimed"},{"key":"257_CR36","doi-asserted-by":"crossref","first-page":"144","DOI":"10.1016\/j.specom.2007.01.001","volume":"49","author":"K Truong","year":"2007","unstructured":"Truong, K., van Leeuwen, D.: Automatic discrimination between laughter and speech. Speech Com 49, 144\u2013158 (2007)","journal-title":"Speech Com"},{"key":"257_CR37","doi-asserted-by":"crossref","unstructured":"Vildjiounaite, E., Kyll\u00f6nen, V., Vuorinen, O., M\u00e4kel\u00e4, S.-M., Ker\u00e4nen, T., Niiranen, M., Knuutinen, J., Peltola, J.: Requirements and software framework for adaptive multimodal affect recognition. In: Proceedings of the ACII 2009, pp. 1\u20137 (2009)","DOI":"10.1109\/ACII.2009.5349393"},{"issue":"12","key":"257_CR38","first-page":"1743","volume":"27","author":"A Vinciarelli","year":"2009","unstructured":"Vinciarelli, A., Pantic, M., Bourlard, H.: Social signal processing: survey of an emerging domain. IMAVIS 27(12), 1743\u20131759 (2009)","journal-title":"IMAVIS"},{"key":"257_CR39","doi-asserted-by":"crossref","unstructured":"Viola, P., Jones, M.: Rapid object detection using a boosted cascade of simple features. In: Proceedings of the CVPR 2001, vol. 1, pp. I-511\u2013I-518 (2001)","DOI":"10.1109\/CVPR.2001.990517"},{"key":"257_CR40","doi-asserted-by":"crossref","unstructured":"Vuorinen, O., Peltola, J., M\u00e4kel\u00e4, S.-M.: Unsupervised speaker change detection for mobile device recorded speech. In: Proceedings of the ICASSP 2007, vol. 2, pp. 757\u2013760 (2007)","DOI":"10.1109\/ICASSP.2007.366346"},{"issue":"4","key":"257_CR41","doi-asserted-by":"crossref","first-page":"206","DOI":"10.1109\/T-AFFC.2011.12","volume":"2","author":"J Wagner","year":"2011","unstructured":"Wagner, J., Andr\u00e9, E., Lingenfelser, F., Kim, J.: Exploring fusion methods for multimodal emotion recognition with missing data. IEEE Trans Affect Comput 2(4), 206\u2013218 (2011)","journal-title":"IEEE Trans Affect Comput"},{"key":"257_CR42","doi-asserted-by":"crossref","unstructured":"Wollmer, M., Metallinou, A., Eyben, F., Schuller, B., Narayanan, S.: Context-sensitive multimodal emotion recognition from speech and facial expression using bidirectional LSTM modeling. In: Proceedings of the Interspeech 2010, pp. 2362\u20132365 (2010)","DOI":"10.21437\/Interspeech.2010-646"},{"issue":"2","key":"257_CR43","doi-asserted-by":"crossref","first-page":"478","DOI":"10.1109\/TASL.2006.881692","volume":"15","author":"J Wu","year":"2007","unstructured":"Wu, J., Huo, Q.: A study of minimum classification error (MCE) linear regression for supervised adaptation of MCE-trained continuous-density hidden Markov models. IEEE Trans Audio Speech Lang Process 15(2), 478\u2013488 (2007)","journal-title":"IEEE Trans Audio Speech Lang Process"},{"issue":"5","key":"257_CR44","doi-asserted-by":"crossref","first-page":"665","DOI":"10.1109\/TKDE.2005.83","volume":"17","author":"Xingquan Zhu","year":"2005","unstructured":"Zhu, Xingquan, Xindong, Wu, Elmagarmid, A.K., Feng, Zhe, Lide, Wu: Video data mining: semantic indexing and event detection from the association perspective. IEEE Trans Knowl Data Eng 17(5), 665\u2013677 (2005)","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"257_CR45","doi-asserted-by":"crossref","unstructured":"Xiong, Z., Radhakrishnan, R., Divakaran, A., Huang, T.S.: Audio events detection based highlights extraction from baseball, golf and soccer games in a unified framework. In: Proceedings of the ICASSP 2003, vol. 5, pp. 632\u2013635 (2003)","DOI":"10.1109\/ICASSP.2003.1200049"},{"issue":"2","key":"257_CR46","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/1352012.1352015","volume":"4","author":"M Xu","year":"2008","unstructured":"Xu, M., Xu, C., Duan, L., Jin, J.S., Luo, S.: Audio keywords generation for sports video analysis. ACM Trans Multimed Comp Commun Appl 4(2), 1\u201323 (2008)","journal-title":"ACM Trans Multimed Comp Commun Appl"},{"issue":"2","key":"257_CR47","doi-asserted-by":"crossref","first-page":"424","DOI":"10.1109\/TMM.2006.886310","volume":"9","author":"Z Zeng","year":"2007","unstructured":"Zeng, Z., Tu, J., Liu, M., Huang, T., Pianfetti, B., Roth, D., Levinson, S.: Audio-visual affect recognition. IEEE Trans Multimed 9(2), 424\u2013428 (2007)","journal-title":"IEEE Trans Multimed"},{"issue":"4","key":"257_CR48","doi-asserted-by":"crossref","first-page":"570","DOI":"10.1109\/TMM.2008.921737","volume":"10","author":"Z Zeng","year":"2008","unstructured":"Zeng, Z., Tu, J., Pianfetti, B., Huang, T.: Audio-visual affective expression recognition through multistream fused HMM. IEEE Trans Multimed 10(4), 570\u2013577 (2008)","journal-title":"IEEE Trans Multimed"},{"issue":"1","key":"257_CR49","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1109\/TPAMI.2008.52","volume":"31","author":"Z Zeng","year":"2009","unstructured":"Zeng, Z., Pantic, M., Roisman, G., Huang, T.: A survey of affect recognition methods: audio, visual, and spontaneous expressions. IEEE Trans PAMI 31(1), 39\u201358 (2009)","journal-title":"IEEE Trans PAMI"},{"key":"257_CR50","first-page":"611","volume":"2005","author":"D Zhang","year":"2005","unstructured":"Zhang, D., Gatica-Perez, D., Bengio, S., McCowan, I.: Semi-supervised meeting event recognition with adapted HMMs. Proc ICME 2005, 611\u2013618 (2005)","journal-title":"Proc ICME"},{"issue":"6","key":"257_CR51","doi-asserted-by":"crossref","first-page":"1167","DOI":"10.1109\/TMM.2007.902847","volume":"9","author":"G Zhu","year":"2007","unstructured":"Zhu, G., Huang, Q., Changsheng, X., Xing, L., Gao, W., Yao, H.: Human behavior analysis for highlight ranking in broadcast racket sports video. IEEE Trans Multimed 9(6), 1167\u20131182 (2007)","journal-title":"IEEE Trans Multimed"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-012-0257-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00530-012-0257-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-012-0257-1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,21]],"date-time":"2025-03-21T19:02:56Z","timestamp":1742583776000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00530-012-0257-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,2,26]]},"references-count":51,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2012,6]]}},"alternative-id":["257"],"URL":"https:\/\/doi.org\/10.1007\/s00530-012-0257-1","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"type":"print","value":"0942-4962"},{"type":"electronic","value":"1432-1882"}],"subject":[],"published":{"date-parts":[[2012,2,26]]}}}