{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T20:04:17Z","timestamp":1776888257305,"version":"3.51.2"},"publisher-location":"Boston, MA","reference-count":102,"publisher":"Springer US","isbn-type":[{"value":"9780387938073","type":"print"},{"value":"9780387938080","type":"electronic"}],"license":[{"start":{"date-parts":[[2010,1,1]],"date-time":"2010-01-01T00:00:00Z","timestamp":1262304000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2010,1,1]],"date-time":"2010-01-01T00:00:00Z","timestamp":1262304000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2010]]},"DOI":"10.1007\/978-0-387-93808-0_40","type":"book-chapter","created":{"date-parts":[[2009,10,1]],"date-time":"2009-10-01T11:47:11Z","timestamp":1254397631000},"page":"1071-1116","source":"Crossref","is-referenced-by-count":17,"title":["Computers in the Human Interaction Loop"],"prefix":"10.1007","author":[{"given":"A.","family":"Waibel","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"R.","family":"Stiefelhagen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"R.","family":"Carlson","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"J.","family":"Casas","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"J.","family":"Kleindienst","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"L.","family":"Lamel","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"O.","family":"Lanz","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"D.","family":"Mostefa","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"M.","family":"Omologo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"F.","family":"Pianesi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"L.","family":"Polymenakos","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"G.","family":"Potamianos","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"J.","family":"Soldatos","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"G.","family":"Sutschet","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"J.","family":"Terken","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"40_CR1","volume-title":"UPC Audio, Video and Multimodal Person Tracking Systems in the CLEAR Evaluation Campaign. In: Multimodal Technologies for Perception of Humans, Proceedings of the First International CLEAR Evaluation Workshop","author":"A. Abad","year":"2006","unstructured":"Abad, A., Canton-Ferrer, C., Segura, C., Landabaso, J.L., Macho, D., Casas, J.R., Hernando, J., Pardas, M., Nadeu, C.: UPC Audio, Video and Multimodal Person Tracking Systems in the CLEAR Evaluation Campaign. In: Multimodal Technologies for Perception of Humans, Proceedings of the First International CLEAR Evaluation Workshop. Springer LNCS 4122, Southampton, UK (2006)"},{"key":"40_CR2","first-page":"1137","volume-title":"A compact model for speaker adaptation training. In: Proc. Int. Conf. Spoken Language Process. (ICSLP)","author":"T. Anastasakos","year":"1996","unstructured":"Anastasakos, T., McDonough, J., Schwartz, R., Makhoul, J.: A compact model for speaker adaptation training. In: Proc. Int. Conf. Spoken Language Process. (ICSLP), pp. 1137\u20131140. Philadelphia, PA (1996)"},{"key":"40_CR3","unstructured":"Andreou, A., Kamm, T., Cohen, J.: Experiments in vocal tract normalisation. In: Proc. CAIP Works.: Frontiers in Speech Recognition II (1994)"},{"issue":"7","key":"40_CR4","doi-asserted-by":"publisher","first-page":"2011","DOI":"10.1109\/TASL.2007.902460","volume":"15","author":"X. Anguera","year":"2007","unstructured":"Anguera, X., Wooters, C., Hernando, J.: Acoustic beamforming for speaker diarization of meetings. IEEE Trans. Audio Speech Language Process. 15(7), 2011\u20132022 (2007)","journal-title":"IEEE Trans. Audio Speech Language Process"},{"key":"40_CR5","unstructured":"Bales, R.F.: Interaction process analysis: a method for the study of small groups. University of Chicago press (1976)"},{"key":"40_CR6","doi-asserted-by":"crossref","first-page":"41","DOI":"10.1111\/j.1540-4560.1948.tb01783.x","volume":"4","author":"K.D. Benne","year":"1948","unstructured":"Benne, K.D., Sheats, P.: Functional roles of group members. Journal of Social Issues 4 pp. 41\u201349 (1948)","journal-title":"Journal of Social Issues"},{"key":"40_CR7","first-page":"70","volume-title":"Multimodal Technologies for Perception of Humans, Proceedings of the International Evaluation Workshops CLEAR 2007 and RT 2007","author":"K. Bernardin","year":"2007","unstructured":"Bernardin, K., Gehrig, T., Stiefelhagen, R.: Multi-Level Particle Filter Fusion of Features and Cues for Audio-Visual Person Tracking. In: Multimodal Technologies for Perception of Humans, Proceedings of the International Evaluation Workshops CLEAR 2007 and RT 2007, LNCS, vol. 4625, pp. 70\u201381. Springer, Baltimore, MD, USA (2007)"},{"key":"40_CR8","doi-asserted-by":"crossref","unstructured":"Bernardin, K., Stiefelhagen, R.: Evaluating multiple object tracking performance: The CLEAR MOT metrics. EURASIP Journal on Image and Video Processing, Special Issue on Video Tracking in Complex Scenes for Surveillance Applications (2008)","DOI":"10.1155\/2008\/246309"},{"key":"40_CR9","doi-asserted-by":"crossref","unstructured":"Beskow, J., Karlsson, I., Kewley, J., Salvi, G.: SYNFACE - A talking head telephone for the hearing-impaired, pp. 1178\u20131186. Springer-Verlag (2004)","DOI":"10.1007\/978-3-540-27817-7_173"},{"key":"40_CR10","doi-asserted-by":"crossref","unstructured":"Beskow, J., Nordenberg, M.: Data-driven synthesis of expressive visual speech using an mpeg-4 talking head. In: Proceedings of Interspeech 2005. Lisbon (2005)","DOI":"10.21437\/Interspeech.2005-376"},{"issue":"3","key":"40_CR11","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1109\/34.910878","volume":"23","author":"A.F. Bobick","year":"2001","unstructured":"Bobick, A.F., Davis, J.W.: The recognition of human movement using temporal templates. IEEE Trans. on Pattern Analysis and Machine Intelligence 23(3), 257\u2013267 (2001)","journal-title":"IEEE Trans. on Pattern Analysis and Machine Intelligence"},{"key":"40_CR12","volume-title":"D.W.: Reflection: Turning experience into learning","year":"1988","unstructured":"Boud, D., Keogh, R., (Eds.), D.W.: Reflection: Turning experience into learning. Kogan Page, London (1988)"},{"key":"40_CR13","first-page":"55","volume-title":"A generative approach to audio-visual person tracking. In: Multimodal Technologies for Perception of Humans, Proceedings of the First International CLEAR Evaluation Workshop","author":"R. Brunelli","year":"2006","unstructured":"Brunelli, R., Brutti, A., Chippendale, P., Lanz, O., Omologo, M., Svaizer, P., Tobia, F.: A generative approach to audio-visual person tracking. In: Multimodal Technologies for Perception of Humans, Proceedings of the First International CLEAR Evaluation Workshop, pp. 55\u201368. Springer LNCS 4122, Southampton, UK (2006)"},{"key":"40_CR14","series-title":"Proceedings of the International Evaluation Workshops CLEAR 2007 and RT 2007, LNCS","volume-title":"A person tracking system for CHIL meetings. In: Multimodal Technologies for Perception of Humans","author":"A. Brutti","year":"2007","unstructured":"Brutti, A.: A person tracking system for CHIL meetings. In: Multimodal Technologies for Perception of Humans, Proceedings of the International Evaluation Workshops CLEAR 2007 and RT 2007, LNCS, vol. 4625. Springer, Baltimore, MD, USA (2007)"},{"key":"40_CR15","doi-asserted-by":"crossref","unstructured":"Callaway, C., Not, E., Stock, O.: Report generation for post-visit summaries in museum environments. In: O. Stock, M. Zancanaro (eds.). PEACH: Intelligent Interfaces for Museum Visits. Springer (2007)","DOI":"10.1007\/3-540-68755-6_4"},{"key":"40_CR16","unstructured":"Canton-Ferrer, C., Casas, J.R., Pard\u00e0s, M.: Human model and motion based 3D action recognition in multiple view scenarios (invited paper). In: 14th European Signal Processing Conference, EUSIPCO. EURASIP, University of Pisa, Florence, Italy (2006). ISBN: 0-387-34223-0"},{"key":"40_CR17","series-title":"Proceedings of the International Evaluation Workshops CLEAR 2007 and RT 2007, LNCS","first-page":"91","volume-title":"M.Pardas: Multi-person tracking strategies based on voxel analysis. In: Multimodal Technologies for Perception of Humans","author":"C. Canton-Ferrer","year":"2007","unstructured":"Canton-Ferrer, C., Salvador, J., Casas, J., M.Pardas: Multi-person tracking strategies based on voxel analysis. In: Multimodal Technologies for Perception of Humans, Proceedings of the International Evaluation Workshops CLEAR 2007 and RT 2007, LNCS, vol. 4625, pp. 91\u2013103. Springer, Baltimore, MD, USA (2007)"},{"key":"40_CR18","doi-asserted-by":"crossref","unstructured":"Canton-Ferrer, C., Segura, C., Casas, J.R., Pard\u00e0s, M., Hernando, J.: Audiovisual head orientation estimation with particle filters in multisensor scenarios. EURASIP Journal on Advances in Signal Processing (2007)","DOI":"10.1155\/2008\/276846"},{"key":"40_CR19","unstructured":"The CHIL technology catalogue. http:\/\/chil.server.de\/servlet\/is\/5777\/"},{"key":"40_CR20","unstructured":"Chippendale, P., Lanz, O.: Optimised meeting recording and annotation using real-time video analysis. In: Proc. 5th Joint Workshop on Machine Learning and Multimodal Interaction, MLMI08. Utrecht, The Netherlands (2008)"},{"key":"40_CR21","unstructured":"CLEAR \u2013 Classification of Events, Activities, and Relationships Evaluation and Workshop: http:\/\/www.clear-evaluation.org"},{"key":"40_CR22","unstructured":"The CLEF Website: http:\/\/www.clef-campaign.org\/"},{"key":"40_CR23","volume-title":"A context-aware virtual secretary in a smart office environment","author":"M. Danninger","year":"2008","unstructured":"Danninger, M., Stiefelhagen, R.: A context-aware virtual secretary in a smart office environment. In: Proceedings of the ACM Multimedia 2008. Vancouver, Canada (2008)"},{"issue":"4","key":"40_CR24","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1109\/TASSP.1980.1163420","volume":"28","author":"S. Davis","year":"1980","unstructured":"Davis, S., Mermelstein, P.: Comparison of parametric representations of monosyllabic word recognition in continuously spoken sentences. IEEE Trans. on Acoustics, Speech, and Signal Process. 28(4), 357\u2013366 (1980)","journal-title":"IEEE Trans. on Acoustics, Speech, and Signal Process"},{"key":"40_CR25","unstructured":"D2.2 functional requirements and chil cooperative information system software design, part 2, cooperative information system software design. Available on http:\/\/chil.server.de"},{"key":"40_CR26","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","volume":"39","author":"A.P. Dempster","year":"1977","unstructured":"Dempster, A.P., Laird, M.M., Rubin, D.B.: Maximum likelihood from incomplete data via the EM algorithm. J. of the Royal Statistical Society Series B (methodological) 39, 1\u201338 (1977)","journal-title":"J. of the Royal Statistical Society Series B (methodological)"},{"issue":"4","key":"40_CR27","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1109\/MPRV.2008.75","volume":"7","author":"N. Dimakis","year":"2008","unstructured":"Dimakis, N., Soldatos, J., Polymenakos, L., Curin, J., Fleury, P., Kleindienst, J.: Integrated development of context-aware applications in smart spaces. IEEE Pervasive Computing 7(4), 71\u201379 (2008)","journal-title":"IEEE Pervasive Computing"},{"key":"40_CR28","volume-title":"Using the influence model to recognize functional roles in meetings","author":"W. Dong","year":"2007","unstructured":"Dong, W., Lepri, B., Cappelletti, A., Pentland, A., Pianesi, F., Zancanaro, M.: Using the influence model to recognize functional roles in meetings. In: Proceedings of the International Conference on Multimodal Interaction ICMI2007. Nagoya, Japan (2007)"},{"key":"40_CR29","doi-asserted-by":"crossref","unstructured":"Dourish, P.: The appropriation of interactive technologies: Some lessons from placeless documents. Computer Supported Cooperative Work (2003)","DOI":"10.1023\/A:1026149119426"},{"key":"40_CR30","volume-title":"How To Make Meetings Work","author":"M. Doyle","year":"1993","unstructured":"Doyle, M., Straus, D.: How To Make Meetings Work. The Berkley Publishing Group, New York, NY (1993)"},{"key":"40_CR31","first-page":"682","volume-title":"Pushy versus meek - using avatars to influence turn-taking behaviour","author":"J. Edlund","year":"2007","unstructured":"Edlund, J., Beskow, J.: Pushy versus meek - using avatars to influence turn-taking behaviour. In: Proceedings of Interspeech 2007 ICSLP, pp. 682\u2013685. Antwerp, Belgium (2007)"},{"issue":"8-9","key":"40_CR32","doi-asserted-by":"publisher","first-page":"630","DOI":"10.1016\/j.specom.2008.04.002","volume":"50","author":"J. Edlund","year":"2008","unstructured":"Edlund, J., Gustafson, J., Heldner, M., Hjalmarsson, A.: Towards human-like spoken dialogue systems. Speech Communication 50(8-9), 630\u2013645 (2008). URL http:\/\/www.speech.kth.se\/prod\/publications\/files\/3145.pdf","journal-title":"Speech Communication"},{"issue":"2-4","key":"40_CR33","doi-asserted-by":"publisher","first-page":"215","DOI":"10.1159\/000090099","volume":"62","author":"J. Edlund","year":"2005","unstructured":"Edlund, J., Heldner, M.: Exploring prosody in interaction control. Phonetica 62(2-4), 215\u2013226 (2005)","journal-title":"Phonetica"},{"key":"40_CR34","unstructured":"Edlund, J., Heldner, M.: Underpinning \/nailon\/: automatic estimation of pitch range and speaker relative pitch. In: C. M\u00fcller (ed.) Speaker Classification. Springer\/LNAI (2007)"},{"key":"40_CR35","volume-title":"Analysis of local appearance-based face recognition: Effects of feature selection and feature normalization","author":"H.K. Ekenel","year":"2006","unstructured":"Ekenel, H.K., Stiefelhagen, R.: Analysis of local appearance-based face recognition: Effects of feature selection and feature normalization. In: CVPR Biometrics Workshop. New York, USA (2006)"},{"key":"40_CR36","unstructured":"ELRA Catalogue of Language Resources: http:\/\/catalog.elra.info"},{"key":"40_CR37","unstructured":"FIPA: The foundation for intelligent physical agents. http:\/\/www.fipa.org"},{"key":"40_CR38","first-page":"347","volume-title":"A post-processing system to yield reduced word error rates: Recogniser output voting error reduction (ROVER)","author":"J.G. Fiscus","year":"1997","unstructured":"Fiscus, J.G.: A post-processing system to yield reduced word error rates: Recogniser output voting error reduction (ROVER). In: Proc. Automatic Speech Recognition and Understanding Works. (ASRU), pp. 347\u2013352. Santa Barbara, CA (1997)"},{"key":"40_CR39","doi-asserted-by":"crossref","unstructured":"Fiscus, J.G., Ajot, J., Michel, M., Garofolo, J.S.: The Rich Transcription 2006 Spring meeting recognition evaluation. In: S. Renals, S. Bengio, J.G. Fiscus (eds.) Machine Learning for Multimodal Interaction, vol. 4299, pp. 309\u2013322. LNCS (2006)","DOI":"10.1007\/11965152_28"},{"key":"40_CR40","unstructured":"Fleury, P., Cu\u0159\u00edn, J., Kleindienst, J.: SitCom - development platform for multimodal perceptual services. In: Proceedings of the 3nd International Conference on Industrial Applications of Holonic and Multi-Agent Systems, pp. 106\u2013113. Regensburg, Germany (2007). V. Marik, V. Vyatkin, A.W. Colombo (Eds.): HoloMAS 2007, LNAI 4659"},{"issue":"2","key":"40_CR41","doi-asserted-by":"publisher","first-page":"291","DOI":"10.1109\/89.279278","volume":"2","author":"J.L. Gauvain","year":"1994","unstructured":"Gauvain, J.L., Lee, C.: Maximum a Posteriori Estimation for Multivariate Gaussian Mixture Observations of Markov Chains. IEEE Trans. on Speech and Audio Processing 2(2), 291\u2013298 (1994). URL ftp:\/\/tlp.limsi.fr\/public\/map93.ps.Z","journal-title":"IEEE Trans. on Speech and Audio Processing"},{"key":"40_CR42","volume-title":"Tracking multiple speakers with probabilistic data association filters. In: Multimodal Technologies for Perception of Humans, Proceedings of the First International CLEAR Evaluation Workshop","author":"T. Gehrig","year":"2006","unstructured":"Gehrig, T., McDonough, J.: Tracking multiple speakers with probabilistic data association filters. In: Multimodal Technologies for Perception of Humans, Proceedings of the First International CLEAR Evaluation Workshop. Springer LNCS 4122, Southampton, UK (2006)"},{"key":"40_CR43","doi-asserted-by":"crossref","unstructured":"Gopinath, R.: Maximum likelihood modeling with Gaussian distributions for classification. In: Proc. Int. Conf. Acoustics Speech Signal Process. (ICASSP), pp. 661\u2013664. Seattle, WA (1998)","DOI":"10.1109\/ICASSP.1998.675351"},{"key":"40_CR44","doi-asserted-by":"crossref","unstructured":"Haeb-Umbach, R., Ney, H.: Linear discriminant analysis for improved large vocabulary continuous speech recognition. In: Proc. Int. Conf. Acoustics Speech Signal Process. (ICASSP), vol. 1, pp. 13\u201316 (1992)","DOI":"10.1109\/ICASSP.1992.225984"},{"key":"40_CR45","unstructured":"Heldner, M., Edlund, J., Carlson, R.: Interruption impossible. In: M. Horne, G. Bruce (eds.) Nordic Prosody: Proceedings of the IXth Conference, Lund 2004, pp. 97\u2013105. Peter Lang, Frankfurt am Main (2006)"},{"issue":"4","key":"40_CR46","doi-asserted-by":"publisher","first-page":"1738","DOI":"10.1121\/1.399423","volume":"87","author":"H. Hermansky","year":"1990","unstructured":"Hermansky, H.: Perceptual linear predictive (PLP) analysis of speech. J. Acoustic Society America 87(4), 1738\u20131752 (1990)","journal-title":"J. Acoustic Society America"},{"key":"40_CR47","doi-asserted-by":"crossref","unstructured":"Huang, J., Marcheret, E., Visweswariah, K.: Improving speaker diarization for CHIL lecture meetings. In: Proc. Interspeech, pp. 1865\u20131868. Antwerp, Belgium (2007)","DOI":"10.21437\/Interspeech.2007-519"},{"key":"40_CR48","series-title":"Multimodal Technologies for Perception of Humans, Proceedings of the International Evaluation Workshops CLEAR 2007 and RT 2007, LNCS","first-page":"497","volume-title":"The IBM RT07 evaluation systems for speaker diarization on lecture meetings","author":"J. Huang","year":"2007","unstructured":"Huang, J., Marcheret, E., Visweswariah, K., Potamianos, G.: The IBM RT07 evaluation systems for speaker diarization on lecture meetings. In: Multimodal Technologies for Perception of Humans, Proceedings of the International Evaluation Workshops CLEAR 2007 and RT 2007, LNCS, vol. 4625, pp. 497\u2013508. Springer, Baltimore, MD, USA (2007)"},{"key":"40_CR49","unstructured":"Hugot, V.: Eye gaze analysis in human-human communication. Master thesis, KTH Speech, Music and Hearing (2007)"},{"key":"40_CR50","doi-asserted-by":"publisher","first-page":"852","DOI":"10.1109\/34.868686","volume":"22","author":"Y.A. Ivanov","year":"2000","unstructured":"Ivanov, Y.A., Bobick., A.F.: Recognition of visual activities and interactions by stochastic parsing. IEEE Transactions on Pattern Analysis and Machine Intelligence 22, 852\u2013872 (2000)","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"40_CR51","unstructured":"JADE: Java Agent DEvelopent Framework. http:\/\/jade.tilab.com"},{"key":"40_CR52","series-title":"Proceedings of the International Evaluation Workshops CLEAR 2007 and RT 2007, LNCS","first-page":"35","volume-title":"The AIT 3D audio \/ visual person tracker for CLEAR 2007. In: Multimodal Technologies for Perception of Humans","author":"N. Katsarakis","year":"2007","unstructured":"Katsarakis, N., Talantzis, F., Pnevmatikakis, A., Polymenakos, L.: The AIT 3D audio \/ visual person tracker for CLEAR 2007. In: Multimodal Technologies for Perception of Humans, Proceedings of the International Evaluation Workshops CLEAR 2007 and RT 2007, LNCS, vol. 4625, pp. 35\u201346. Springer, Baltimore, MD, USA (2007)"},{"key":"40_CR53","volume-title":"The Wisdom of Teams. Creating the High Performance Organisations","author":"J. Katznbach","year":"1993","unstructured":"Katznbach, J., Smith, D.: The Wisdom of Teams. Creating the High Performance Organisations. Harvard Business School Press, Cambridge, MA (1993)"},{"key":"40_CR54","doi-asserted-by":"crossref","unstructured":"Klee, U., Gehrig, T., McDonough, J.: Kalman filters for time delay of arrival-based source localization. Journal of Advanced Signal Processing, Special Issue on Multi-Channel Speech Processing (2006)","DOI":"10.1155\/ASP\/2006\/12378"},{"key":"40_CR55","unstructured":"Kray, C., Wasinger, R., Kortuem, G.: Concepts and issues in interfaces for multiple users and multiple devices. In: Proceedings of the Workshop on Multi-User and Ubiquitous User Interfaces (MU3I), IUI\/CADUI (2004)"},{"key":"40_CR56","doi-asserted-by":"crossref","unstructured":"Kruger, R., Carpendale, M., Scott, S., Tang, A.: Fluid integration of rotation and translation. In: Proceedings of the ACM Conference on Human Factors in Computing Systems (CHI 2005). Portland, Oregon (2005)","DOI":"10.1145\/1054972.1055055"},{"key":"40_CR57","doi-asserted-by":"crossref","unstructured":"Kulyk, O., Wang, C., Terken, J.: Real-time feedback based on nonverbal behaviour to enhance social dynamics in small group meetings. In: MLMI\u201905: Proceedings of the Joint Workshop on Multimodal Interaction and Related Machine Learning Algorithms, LNCS, vol. 3869, pp. 150\u2013161 (2006)","DOI":"10.1007\/11677482_13"},{"key":"40_CR58","doi-asserted-by":"crossref","unstructured":"Landabaso, J.L., M. Pardas, M.: Foreground regions extraction and characterization towards real-time object tracking. In: Machine Learning for Multimodal Interaction (MLMI), vol. 3869, pp. 241\u2013249. Springer LNCS (2006)","DOI":"10.1007\/11677482_21"},{"issue":"9","key":"40_CR59","doi-asserted-by":"publisher","first-page":"1436","DOI":"10.1109\/TPAMI.2006.177","volume":"28","author":"O. Lanz","year":"2006","unstructured":"Lanz, O.: Approximate Bayesian Multibody Tracking. IEEE Transactions on Pattern Analysis and Machine Intelligence 28(9), 1436\u20131449 (2006)","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"40_CR60","doi-asserted-by":"crossref","unstructured":"Lanz, O., Brunelli, R.: Dynamic head location and pose from video. In: IEEE Conf. Multisensor Fusion and Integration (2006)","DOI":"10.1109\/MFI.2006.265661"},{"key":"40_CR61","series-title":"Proceedings of the International Evaluation Workshops CLEAR 2007 and RT 2007, LNCS","volume-title":"An appearance-based particle filter for visual tracking in smart rooms. In: Multimodal Technologies for Perception of Humans","author":"O. Lanz","year":"2007","unstructured":"Lanz, O., Chippendale, P., Brunelli, R.: An appearance-based particle filter for visual tracking in smart rooms. In: Multimodal Technologies for Perception of Humans, Proceedings of the International Evaluation Workshops CLEAR 2007 and RT 2007, LNCS, vol. 4625, pp. 57\u201369. Springer, Baltimore, MD, USA (2007)"},{"issue":"2","key":"40_CR62","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1006\/csla.1995.0010","volume":"9","author":"C.J. Leggetter","year":"1995","unstructured":"Leggetter, C.J., Woodland, P.C.: Maximum likelihood linear regression for speaker adaptation of continuous density hidden Markov models. Computer Speech and Language 9(2), 171\u2013185 (1995)","journal-title":"Computer Speech and Language"},{"key":"40_CR63","series-title":"Multimodal Technologies for Perception of Humans, Proceedings of the International Evaluation Workshops CLEAR 2007 and RT 2007, LNCS","first-page":"543","volume-title":"Speaker diarization for conference room: The UPC RT07\u2009s evaluation system","author":"J. Luque","year":"2007","unstructured":"Luque, J., Anguera, X., Temko, A., Hernando, J.: Speaker diarization for conference room: The UPC RT07\u2009s evaluation system. In: Multimodal Technologies for Perception of Humans, Proceedings of the International Evaluation Workshops CLEAR 2007 and RT 2007, LNCS, vol. 4625, pp. 543\u2013554. Springer, Baltimore, MD, USA (2007)"},{"key":"40_CR64","doi-asserted-by":"crossref","unstructured":"Morris, M., Piper, A., Cassanego, A., Huang, A., Paepcke, A., Winograd, T.: Mediating group dynamics through tabletop interface design. IEEE Computer Graphics and Applications pp. 65\u201373 (2006)","DOI":"10.1109\/MCG.2006.114"},{"key":"40_CR65","doi-asserted-by":"crossref","unstructured":"M.Voit, R.Stiefelhagen: Tracking head pose and focus of attention with multiple far-field cameras. In: International Conference On Multimodal Interfaces - ICMI 2006. Banff, Canada (2006)","DOI":"10.1145\/1180995.1181050"},{"key":"40_CR66","volume-title":"An audio-visual particle filter for speaker tracking on the CLEAR\u201906 evaluation dataset. In: Multimodal Technologies for Perception of Humans, Proceedings of the First International CLEAR Evaluation Workshop","author":"K. Nickel","year":"2006","unstructured":"Nickel, K., Gehrig, T., Ekenel, H.K., McDonough, J., Stiefelhagen, R.: An audio-visual particle filter for speaker tracking on the CLEAR\u201906 evaluation dataset. In: Multimodal Technologies for Perception of Humans, Proceedings of the First International CLEAR Evaluation Workshop. Springer LNCS 4122, Southampton, UK (2006)"},{"key":"40_CR67","doi-asserted-by":"crossref","unstructured":"Nickel, K., Gehrig, T., Stiefelhagen, R., McDonough, J.: A Joint Particle Filter for Audio-visual Speaker Tracking. In: Proceedings of the Seventh International Conference On Multimodal Interfaces - ICMI 2005, pp. 61\u201368. ACM Press (2005)","DOI":"10.1145\/1088463.1088477"},{"key":"40_CR68","unstructured":"The NIST MarkIII Microphone Array: http:\/\/www.nist.gov\/smartspace\/mk3_presentation.html"},{"key":"40_CR69","first-page":"3","volume":"41","author":"F. Pianesi","year":"2007","unstructured":"Pianesi, F., Zancanaro, M., Lepri, B., Cappelletti, A.: A multimodal annotated corpus of consensus decision making meetings. The Journal of Language Resources and Evaluation 41(3\u20134) (2007)","journal-title":"The Journal of Language Resources and Evaluation"},{"key":"40_CR70","doi-asserted-by":"crossref","unstructured":"Pianesi, F., Zancanaro, M., Not, E., Leonardi, C., Falcon, V., Lepri, B.: Multimodal support to group dynamics. Personal and Ubiquitous Computing 12(2) (2008)","DOI":"10.1007\/s00779-007-0144-5"},{"key":"40_CR71","unstructured":"Povey, D., Woodland, P.: Improved discriminative training techniques for large vocabulary continuous speech recognition. In: Proc. Int. Conf. Acoustics Speech Signal Process. (ICASSP). Salt Lake City, UT (2001)"},{"key":"40_CR72","doi-asserted-by":"crossref","unstructured":"Povey, D., Woodland, P.C.: Minimum phone error and I-smoothing for improved discriminative training. In: Proc. Int. Conf. Acoustics Speech Signal Process. (ICASSP), pp. 105\u2013108. Orlando, FL (2002)","DOI":"10.1109\/ICASSP.2002.1005687"},{"key":"40_CR73","doi-asserted-by":"crossref","unstructured":"Rentzeperis, E., Stergiou, A., Boukis, C., Pnevmatikakis, A., Polymenakos, L.C.: The 2006 Athens Information Technology speech activity detection and speaker diarization systems. In: Machine Learning for Multimodal Interaction, vol. 4299, pp. 385\u2013395. LNCS (2006)","DOI":"10.1007\/11965152_34"},{"key":"40_CR74","unstructured":"The Rich Transcription 2006 Spring Meeting Recognition Evaluation Website: http:\/\/www.nist.gov\/speech\/tests\/rt\/2006-spring"},{"key":"40_CR75","unstructured":"Rich Transcription 2007 Meeting Recognition Evaluation. http:\/\/www.nist.gov\/speech\/tests\/rt\/2007"},{"key":"40_CR76","doi-asserted-by":"crossref","unstructured":"Schwenk, H.: Efficient training of large neural networks for language modeling. In: IJCNN, pp. 3059\u20133062 (2004)","DOI":"10.1109\/IJCNN.2004.1381158"},{"key":"40_CR77","series-title":"Multimodal Technologies for Perception of Humans, Proceedings of the International Evaluation Workshops CLEAR 2007 and RT 2007, LNCS","first-page":"82","volume-title":"Multispeaker localization and tracking in intelligent environments","author":"C. Segura","year":"2007","unstructured":"Segura, C., Abad, A., Nadeu, C., Hernando, J.: Multispeaker localization and tracking in intelligent environments. In: Multimodal Technologies for Perception of Humans, Proceedings of the International Evaluation Workshops CLEAR 2007 and RT 2007, LNCS, vol. 4625, pp. 82\u201390. Springer, Baltimore, MD, USA (2007)"},{"key":"40_CR78","doi-asserted-by":"crossref","unstructured":"Sellen, A., Harper, R.: The Myth of the Paperless Office. MIT Press (2001)","DOI":"10.7551\/mitpress\/4833.001.0001"},{"key":"40_CR79","doi-asserted-by":"crossref","unstructured":"Shen, C., Vernier, F., Forlines, C., Ringel, M.: Diamondspin: An extensible toolkit for around-the-table interaction. In: ACM Conference on Human Factors in Computing Systems (CHI) (2004)","DOI":"10.1145\/985692.985714"},{"key":"40_CR80","unstructured":"Siciliano, C., Williams, G., Beskow, J., Faulkner, A.: Evaluation of a multilingual synthetic talking face as a communication aid for the hearing impaired. In: Proc of ICPhS, XV Intl Conference of Phonetic Sciences, pp. 131\u2013134. Barcelona, Spain (2003)"},{"key":"40_CR81","doi-asserted-by":"crossref","unstructured":"Skantze, G., House, D., Edlund, J.: User responses to prosodic variation on fragmentary grounding utterances in dialogue. In: Proceedings Interspeech 2006, pp. 2002\u20132005. Pittsburgh, PA (2006)","DOI":"10.21437\/Interspeech.2006-548"},{"key":"40_CR82","unstructured":"SmarTrack - a SmarT people Tracker. Patent pending. Online at http:\/\/tev.fbk.eu\/smartrack\/"},{"issue":"2","key":"40_CR83","doi-asserted-by":"publisher","first-page":"193","DOI":"10.1007\/s00779-006-0102-7","volume":"11","author":"J. Soldatos","year":"2007","unstructured":"Soldatos, J., Dimakis, N., Stamatis, K., Polymenakos, L.: A Breadboard Architecture for Pervasive Context-Aware Services in Smart Spaces: Middleware Components and Prototype Applications. Personal and Ubiquitous Computing Journal 11(2), 193\u2013212 (2007). URL http:\/\/www.springerlink.com\/content\/j14821834364128w\/","journal-title":"Personal and Ubiquitous Computing Journal"},{"key":"40_CR84","first-page":"1","volume-title":"The CLEAR 2006 Evaluation. In: Multimodal Technologies for Perception of Humans, Proceedings of the First International CLEAR Evaluation Workshop, CLEAR 2006, no. 4122","author":"R. Stiefelhagen","year":"2006","unstructured":"Stiefelhagen, R., Bernardin, K., Bowers, R., Garofolo, J., Mostefa, D., Soundararajan, P.: The CLEAR 2006 Evaluation. In: Multimodal Technologies for Perception of Humans, Proceedings of the First International CLEAR Evaluation Workshop, CLEAR 2006, no. 4122 in Springer LNCS, pp. 1\u201345. Southampton, UK (2006)"},{"key":"40_CR85","series-title":"Multimodal Technologies for Perception of Humans, Proceedings of the International Evaluation Workshops CLEAR 2007 and RT 2007, LNCS","doi-asserted-by":"crossref","DOI":"10.21236\/ADA521073","volume-title":"The CLEAR 2007 Evaluation","author":"R. Stiefelhagen","year":"2007","unstructured":"Stiefelhagen, R., Bernardin, K., Bowers, R., Rose, R.T., Michel, M., Garofolo, J.: The CLEAR 2007 Evaluation. In: Multimodal Technologies for Perception of Humans, Proceedings of the International Evaluation Workshops CLEAR 2007 and RT 2007, LNCS, vol. 4625, pp. 3\u201334. Springer, Baltimore, MD, USA (2007)"},{"key":"40_CR86","doi-asserted-by":"crossref","unstructured":"Stiefelhagen, R., Bernardin, K., Ekenel, H., McDonough, J., Nickel, K., Voit, M., Woelfel, M.: Audio-visual perception of a lecturer in a smart seminar room. Signal Processing - Special Issue on Multimodal Interfaces 86(12) (2006)","DOI":"10.1016\/j.sigpro.2006.02.043"},{"key":"40_CR87","volume-title":"Multimodal Technologies for Perception of Humans, Proceedings of the International Evaluation Workshops CLEAR 2007 and RT 2007, LNCS","year":"2007","unstructured":"Stiefelhagen, R., Bowers, R., Fiscus, J. (eds.): Multimodal Technologies for Perception of Humans, Proceedings of the International Evaluation Workshops CLEAR 2007 and RT 2007, LNCS, vol. 4625. Springer, Baltimore, MD, USA (2007)"},{"key":"40_CR88","volume-title":"Multimodal Technologies for Perception of Humans, First International Evaluation Workshop on Classification of Events, Activities and Relationships, CLEAR\u201906","year":"2006","unstructured":"Stiefelhagen, R., Garofolo, J. (eds.): Multimodal Technologies for Perception of Humans, First International Evaluation Workshop on Classification of Events, Activities and Relationships, CLEAR\u201906. No. 4122 in LNCS. Springer, Southampton, UK (2006)"},{"key":"40_CR89","first-page":"263","volume-title":"Influencing social dynamics in meetings through a peripheral display. In: ICMI \u201907: Proceedings of the 9th international conference on Multimodal interfaces","author":"J. Sturm","year":"2007","unstructured":"Sturm, J., van Herwijnen, O.H., Eyck, A., Terken, J.: Influencing social dynamics in meetings through a peripheral display. In: ICMI \u201907: Proceedings of the 9th international conference on Multimodal interfaces, pp. 263\u2013270. ACM, New York, NY, USA (2007)"},{"key":"40_CR90","doi-asserted-by":"crossref","unstructured":"Svanfeldt, G., Olszewski, D.: Perception experiment combining a parametric loudspeaker and a synthetic talking head. In: Proceedings of Interspeech, pp. 1721\u20131724 (2005)","DOI":"10.21437\/Interspeech.2005-283"},{"issue":"2","key":"40_CR91","doi-asserted-by":"publisher","first-page":"143","DOI":"10.1016\/0020-7373(91)90039-A","volume":"34","author":"J.C. Tang","year":"1991","unstructured":"Tang, J.C.: Finding from observational studies of collaborative work. International Journal of Man-Machine Studies 34(2), 143\u2013160 (1991)","journal-title":"International Journal of Man-Machine Studies"},{"key":"40_CR92","doi-asserted-by":"crossref","unstructured":"Tyagi, A., Potamianos, G., Davis, J.W., Chu, S.M.: Fusion of multiple camera views for kernel-based 3D tracking. In: Proc. IEEE Works. Motion and Video Computing (WMVC). Austin, Texas (2007)","DOI":"10.1109\/WMVC.2007.15"},{"key":"40_CR93","unstructured":"VACE - Video Analysis and Content Extraction, http:\/\/iris.usc.edu\/Outlines\/vace\/vace.html"},{"key":"40_CR94","doi-asserted-by":"crossref","unstructured":"Waibel, A., Stiefelhagen, R. (eds.): Computers in the Human Interaction Loop. Human-Computer Interaction. Springer (2009)","DOI":"10.1007\/978-1-84882-054-8"},{"key":"40_CR95","doi-asserted-by":"publisher","first-page":"183","DOI":"10.1007\/11768029_19","volume-title":"Proceedings of Perception and Interactive Technologies","author":"\u00c5. Wallers","year":"2006","unstructured":"Wallers, \u00c5., Edlund, J., Skantze, G.: The effects of prosodic features on the interpretation of synthesised backchannels. In: E. Andr\u00e9, L. Dybkjaer, W. Minker, H. Neumann, M. Weber (eds.) Proceedings of Perception and Interactive Technologies, pp. 183\u2013187. Springer, Kloster Irsee, Germany (2006)"},{"key":"40_CR96","doi-asserted-by":"crossref","unstructured":"Wojek, C., Nickel, K., Stiefelhagen, R.: Activity recognition and room level tracking in an office environment. In: IEEE Int. Conference on Multisensor Fusion and Integration for Intelligent Systems. Heidelberg, Germany (2006)","DOI":"10.1109\/MFI.2006.265608"},{"key":"40_CR97","unstructured":"W\u00f6lfel, M.: Warped-twice minimum variance distortionless response spectral estimation. In: Proc. EUSIPCO (2006)"},{"key":"40_CR98","doi-asserted-by":"crossref","unstructured":"W\u00f6lfel, M., McDonough, J.: Combining multi-source far distance speech recognition strategies: Beamforming, blind channel and confusion network combination. In: Proc. Interspeech (2005)","DOI":"10.21437\/Interspeech.2005-270"},{"key":"40_CR99","doi-asserted-by":"crossref","unstructured":"Zancanaro, M., Lepri, B., Pianesi, F.: Automatic detection of group functional roles in face to face interactions. In: Proceedings of the International Conference of Multimodal Interfaces ICMI-06 (2006)","DOI":"10.1145\/1180995.1181003"},{"key":"40_CR100","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Potamianos, G., Senior, A.W., Huang, T.S.: Joint face and head tracking inside multi-camera smart rooms. Signal, Image and Video Processing pp. 163\u2013178 (2007)","DOI":"10.1007\/s11760-007-0018-3"},{"key":"40_CR101","doi-asserted-by":"crossref","unstructured":"Zhu, X., Barras, C., Lamel, L., Gauvain, J.L.: Speaker diarization: from Broadcast News to lectures. In: Machine Learning for Multimodal Interaction, vol. 4299, pp. 396\u2013406. LNCS (2006)","DOI":"10.1007\/11965152_35"},{"key":"40_CR102","series-title":"Multimodal Technologies for Perception of Humans, Proceedings of the International Evaluation Workshops CLEAR 2007 and RT 2007, LNCS","first-page":"533","volume-title":"Multi-stage speaker diarization for conference and lecture meetings","author":"X. Zhu","year":"2007","unstructured":"Zhu, X., Barras, C., Lamel, L., Gauvain, J.L.: Multi-stage speaker diarization for conference and lecture meetings. In: Multimodal Technologies for Perception of Humans, Proceedings of the International Evaluation Workshops CLEAR 2007 and RT 2007, LNCS, vol. 4625, pp. 533\u2013542. Springer, Baltimore, MD, USA (2007)"}],"container-title":["Handbook of Ambient Intelligence and Smart Environments"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-0-387-93808-0_40","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,12]],"date-time":"2025-02-12T13:43:26Z","timestamp":1739367806000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-0-387-93808-0_40"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010]]},"ISBN":["9780387938073","9780387938080"],"references-count":102,"URL":"https:\/\/doi.org\/10.1007\/978-0-387-93808-0_40","relation":{},"subject":[],"published":{"date-parts":[[2010]]}}}