{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,13]],"date-time":"2025-10-13T19:53:53Z","timestamp":1760385233609,"version":"3.40.3"},"publisher-location":"Berlin, Heidelberg","reference-count":20,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642314780"},{"type":"electronic","value":"9783642314797"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-31479-7_14","type":"book-chapter","created":{"date-parts":[[2012,8,9]],"date-time":"2012-08-09T04:10:15Z","timestamp":1344485415000},"page":"72-80","source":"Crossref","is-referenced-by-count":14,"title":["Look at Who\u2019s Talking: Voice Activity Detection by Automated Gesture Analysis"],"prefix":"10.1007","author":[{"given":"Marco","family":"Cristani","sequence":"first","affiliation":[]},{"given":"Anna","family":"Pesarin","sequence":"additional","affiliation":[]},{"given":"Alessandro","family":"Vinciarelli","sequence":"additional","affiliation":[]},{"given":"Marco","family":"Crocco","sequence":"additional","affiliation":[]},{"given":"Vittorio","family":"Murino","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"issue":"2","key":"14_CR1","doi-asserted-by":"crossref","first-page":"356","DOI":"10.1109\/TASL.2011.2125954","volume":"20","author":"X. Anguera","year":"2011","unstructured":"Anguera, X., Bozonnet, S., Evans, N., Fredouille, C., Friedland, G., Vinyals, O.: Speaker diarization: A review of recent research. IEEE Transactions on Audio, Speech, and Language Processing 20(2), 356\u2013370 (2011)","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"14_CR2","first-page":"97","volume":"17","author":"P. Boersma","year":"1993","unstructured":"Boersma, P.: Accurate short term analysis of the fundamental frequency and the harmonics to noise ratio of a sampled sound. IEEE Transactions on Image Processing\u00a017, 97\u2013110 (1993)","journal-title":"IEEE Transactions on Image Processing"},{"issue":"9\/10","key":"14_CR3","first-page":"341","volume":"5","author":"P. Boersma","year":"2001","unstructured":"Boersma, P.: Praat, a system for doing phonetics by computer. Glot International\u00a05(9\/10), 341\u2013345 (2001)","journal-title":"Glot International"},{"doi-asserted-by":"crossref","unstructured":"Cassell, J., Steedman, M., Badler, N., Pelachaud, C., Stone, M., Douville, B., Prevost, S., Achorn, B.: Modeling the interaction between speech and gesture. In: Proceedings of the Sixteenth Annual Conference of the Cognitive Science Society, pp. 153\u2013158 (1994)","key":"14_CR4","DOI":"10.21236\/ADA290549"},{"doi-asserted-by":"crossref","unstructured":"Cristani, M., Bazzani, L., Paggetti, G., Fossati, A., Bue, A.D., Menegaz, G., Murino, V.: Social interaction discovery by statistical analysis of f-formations. In: Proceedings of the British Machine Vision Conference (2011)","key":"14_CR5","DOI":"10.5244\/C.25.23"},{"unstructured":"Fisher, J.W., Freeman, W.T., Darrell, T., Viola, P.: Learning joint statistical models for audio-visual fusion and segregation. In: Advanced in Neural Inf. Process. Syst., vol.\u00a013, pp. 772\u2013778 (2001)","key":"14_CR6"},{"doi-asserted-by":"crossref","unstructured":"Hung, H., Ba, S.O.: Speech\/non-speech detection in meetings from automatically extracted low resolution visual features. In: ICASSP, pp. 830\u2013833 (2010)","key":"14_CR7","DOI":"10.1109\/ICASSP.2010.5494913"},{"doi-asserted-by":"crossref","unstructured":"Hung, H., Huang, Y., Yeo, C., Gatica-Perez, D.: Associating audio-visual activity cues in a dominance estimation framework. In: First IEEE Workshop on CVPR for Human Communicative Behavior Analysis (2008)","key":"14_CR8","DOI":"10.1109\/CVPRW.2008.4563178"},{"doi-asserted-by":"crossref","unstructured":"Kendon, A.: Gesticulation and speech: Two aspects of the process of utterance. In: The Relationship of Verbal and Nonverbal Communication, pp. 207\u2013227 (1980)","key":"14_CR9","DOI":"10.1515\/9783110813098.207"},{"doi-asserted-by":"crossref","unstructured":"Kendon, A.: Language and gesture: unity or duality?, pp. 47\u201363. Cambridge University Press (2000)","key":"14_CR10","DOI":"10.1017\/CBO9780511620850.004"},{"key":"14_CR11","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9780511807572","volume-title":"Gesture: Visible Action as Utterance","author":"A. Kendon","year":"2004","unstructured":"Kendon, A.: Gesture: Visible Action as Utterance. Cambridge University Press, Cambridge (2004)"},{"unstructured":"Khondaker, A., Ghulam, M.: Improved noise reduction with pitch enabled voice activity detection. In: ISIVC 2008 (2008)","key":"14_CR12"},{"issue":"1","key":"14_CR13","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1002\/cav.6","volume":"15","author":"S. Kopp","year":"2004","unstructured":"Kopp, S., Wachsmuth, I.: Synthesizing multimodal utterances for conversational agents. Computer Animation and Virtual Worlds\u00a015(1), 39\u201352 (2004)","journal-title":"Computer Animation and Virtual Worlds"},{"key":"14_CR14","volume-title":"Hand and mind: What gestures reveal about thought","author":"D. McNeill","year":"1992","unstructured":"McNeill, D.: Hand and mind: What gestures reveal about thought. Chicago University Press, Chicago (1992)"},{"unstructured":"Noulas, A., Englebienne, G., Krose, B.J.A.: Multimodal speaker diarization. IEEE Transactions on Pattesrnss Analysis and Machine Intelligence\u00a099 (2011)","key":"14_CR15"},{"doi-asserted-by":"crossref","unstructured":"Rao, R., Chen, T.: Cross-modal prediction in audio-visual communication. In: IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP-1996, vol.\u00a04, pp. 2056\u20132059 (1996)","key":"14_CR16","DOI":"10.1109\/ICASSP.1996.545722"},{"unstructured":"Siracusa, M.R., John, W.F.: Dynamic dependency tests: Analysis and applications to multi-modal data association (2007)","key":"14_CR17"},{"doi-asserted-by":"crossref","unstructured":"Vajaria, H., Islam, T., Sarkar, S., Sankar, R., Kasturi, R.: Audio segmentation and speaker localization in meeting videos. In: 18th International Conference on Pattern Recognition, ICPR 2006, vol.\u00a02, pp. 1150\u20131153 (2006)","key":"14_CR18","DOI":"10.1109\/ICPR.2006.283"},{"doi-asserted-by":"crossref","unstructured":"Vinciarelli, A., Pantic, M., Heylen, D., Pelachaud, C., Poggi, I., D\u2019Errico, F., Schr\u00f6der, M.: Bridging the gap between social animal and unsocial machine: A survey of social signal processing. IEEE Transactions on Affective Computing (2011) (to appear)","key":"14_CR19","DOI":"10.1109\/T-AFFC.2011.27"},{"issue":"3","key":"14_CR20","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1207\/s15324834basp0103_2","volume":"1","author":"G. Wells","year":"1980","unstructured":"Wells, G., Petty, R.: The e_ects of over head movements on persuasion. Basic and Applied Social Psychology\u00a01(3), 219\u2013230 (1980)","journal-title":"Basic and Applied Social Psychology"}],"container-title":["Communications in Computer and Information Science","Constructing Ambient Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-31479-7_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T18:27:58Z","timestamp":1743964078000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-31479-7_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642314780","9783642314797"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-31479-7_14","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2012]]}}}