{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T19:10:23Z","timestamp":1685387423145},"reference-count":54,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2010,1,23]],"date-time":"2010-01-23T00:00:00Z","timestamp":1264204800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2011,2]]},"DOI":"10.1007\/s11042-009-0436-z","type":"journal-article","created":{"date-parts":[[2010,1,22]],"date-time":"2010-01-22T09:16:49Z","timestamp":1264151809000},"page":"1035-1067","source":"Crossref","is-referenced-by-count":0,"title":["Utilizing gestures to improve sentence boundary detection"],"prefix":"10.1007","volume":"51","author":[{"given":"Lei","family":"Chen","sequence":"first","affiliation":[]},{"given":"Mary P.","family":"Harper","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2010,1,23]]},"reference":[{"key":"436_CR1","volume-title":"Bodily communication","author":"M Argyle","year":"1988","unstructured":"Argyle M (1988) Bodily communication, 2nd edn. Methuen, London","edition":"2"},{"key":"436_CR2","unstructured":"Beeferman D, Berger A, Lafferty J (1998) Cyperpunc: a lightweight punctuation annotation system for speech. In: Proceedings of the international conference of acoustics, speech, and signal processing (ICASSP)"},{"key":"436_CR3","first-page":"39","volume":"22","author":"A Berger","year":"1996","unstructured":"Berger A, Pietra S, Pietra V (1996) A maximum entropy approach to natural language processing. Comput Linguist 22:39\u201372","journal-title":"Comput Linguist"},{"issue":"2","key":"436_CR4","first-page":"123","volume":"24","author":"L Breiman","year":"1996","unstructured":"Breiman L (1996) Bagging predictors. Mach Learn 24(2):123\u2013140","journal-title":"Mach Learn"},{"key":"436_CR5","doi-asserted-by":"crossref","first-page":"63","DOI":"10.1007\/BF01889584","volume":"2","author":"W Buntine","year":"1992","unstructured":"Buntine W (1992) Learning classification trees. Stat Comput 2:63\u201373","journal-title":"Stat Comput"},{"key":"436_CR6","unstructured":"Cassell J, Stone M (1999) Living hand to mouth: psychological theories about speech and gesture in interactive dialogue systems. In: Proceedings of the AAAI conference on artificial intelligence"},{"key":"436_CR7","doi-asserted-by":"crossref","unstructured":"Chai J, Hong P, Zhou M (2004) A probabilistic approach to reference resolution in multimodal user interfaces. In: Proceedings of the conference on intelligent user interface (IUI). ACM Press, pp 70\u201377","DOI":"10.1145\/964442.964457"},{"key":"436_CR8","doi-asserted-by":"crossref","unstructured":"Chen C (1999) Speech recognition with automatic punctuation. In: Proceedings of the European conference on speech processing (EuroSpeech)","DOI":"10.21437\/Eurospeech.1999-115"},{"key":"436_CR9","doi-asserted-by":"crossref","unstructured":"Chen L, Harper M, Huang Z (2006) Using maximum entropy (ME) model to incorporate gesture cues for SU detection. In: Proceedings of the international conference on multimodal interface (ICMI), Banff, Canada","DOI":"10.1145\/1180995.1181035"},{"key":"436_CR10","unstructured":"Chen L, Liu Y, Harper M, Shriberg E (2004) Multimodal model integration for sentence unit detection. In: Proceedings of the international conference on multimodal interface (ICMI), University Park, PA"},{"key":"436_CR11","doi-asserted-by":"crossref","unstructured":"Chen L, Rose T, Qiao Y, Kimbara I, Parrill F, Welji H, Xu T, Tu J, Huang Z, Harper M, Quek F, Xiong Y, McNeill D, Tuttle R, Huang TS (2005) VACE multimodal meeting corpus. In: Proceedings of the joint workshop on machine learning and multimodal interaction (MLMI)","DOI":"10.1007\/11677482_4"},{"key":"436_CR12","doi-asserted-by":"crossref","unstructured":"Chen S, Rosenfeld R (1999) A gaussian prior for smoothing maximum entropy models. Tech. rep., Carnegie Mellon University","DOI":"10.21236\/ADA360974"},{"key":"436_CR13","unstructured":"EARS (2002) DARPA EARS Program. http:\/\/projects.ldc.upenn.edu\/EARS\/"},{"key":"436_CR14","doi-asserted-by":"crossref","unstructured":"Eisenstein J, Davis R (2005) Gestural cues for sentence segmentation. MIT AI Memo","DOI":"10.1145\/1027933.1028002"},{"key":"436_CR15","doi-asserted-by":"crossref","unstructured":"Eisenstein J, Davis R (2006) Gesture improves coreference resolution. In: Proceedings of the conference of the North American chapter of the association for computational linguistics (NAACL)","DOI":"10.3115\/1614049.1614059"},{"key":"436_CR16","unstructured":"Eisenstein J, Davis R (2007) Conditional modality fusion for coreference resolution. In: Proceedings of the conference of annual meeting on association for computational linguistics linguistics (ACL)"},{"key":"436_CR17","first-page":"390","volume-title":"Affect, cognition and personality","author":"P Ekman","year":"1965","unstructured":"Ekman P (1965) Communication through nonverbal behavior: a source of information about an interpersonal relationship. In: Tomkinds SS, Izard CE (eds) Affect, cognition and personality. Springer, New York, pp 390\u2013442"},{"key":"436_CR18","doi-asserted-by":"crossref","unstructured":"Esposito A, McCullough K, Quek F (2001) Disfluencies in gesture: gestural correlates to speech silent and filled pauses. In: Proceeding of IEEE workshop on cues in communication, Kauai,Hawaii","DOI":"10.21437\/ICSLP.2002-187"},{"key":"436_CR19","first-page":"87","volume":"8","author":"U Fayyad","year":"1992","unstructured":"Fayyad U, Irani K (1992) On the handling of continuous-valued attributes in decision tree generation. Mach Learn 8:87\u2013102","journal-title":"Mach Learn"},{"key":"436_CR20","unstructured":"Garofolo J, Laprum C, Michel M, Stanford V, Tabassi E (2004) The NIST meeting room pilot corpus. In: Proceedings of the conference on language resources and evaluations (LREC)"},{"key":"436_CR21","unstructured":"Gotoh Y, Renals S (2000) Sentence boundary detection in broadcast speech transcript. In: Proceedings of the international speech communication association (ISCA) workshop: automatic speech recognition: challenges for the new millennium ASR-2000"},{"key":"436_CR22","unstructured":"Huang Z, Harper M (2005) Speech and non-speech detection in meeting audio for transcription. In: Proceedings of NIST RT-05 workshop"},{"key":"436_CR23","unstructured":"Huang Z, Chen L, Harper M (2006) An open source prosodic feature extraction tool. In: Proceedings of the conference on language resources and evaluations (LREC)"},{"key":"436_CR24","unstructured":"Huang Z, Harper M, Wang W (2007) Mandarin part-of-speech tagging and discriminative reranking. In: Proceedings of the empirical methods in natural language processing (EMNLP), Prague, Czech"},{"key":"436_CR25","first-page":"119","volume-title":"Nonverbal communication","author":"A Kendon","year":"1974","unstructured":"Kendon A (1974) Movement coordination in social interaction: some examples described. In: Weitz S (ed) Nonverbal communication. Oxford University Press, New York, pp 119\u2013133"},{"key":"436_CR26","unstructured":"Lafferty J, McCallum A, Pereira F (2001) Conditional random field: probabilistic models for segmenting and labeling sequence data. In: Proceedings of the international conference on machine learning (ICML)"},{"key":"436_CR27","unstructured":"(LDC) LDC (2004) Meeting recording quick transcription guidelines, 1st edn. http:\/\/www.nist.gov\/speech\/test_beds\/mr_proj\/meeting_corpus_1\/documents\/pdf\/MeetingDataQTRSpec-V1.3.pdf"},{"key":"436_CR28","unstructured":"(LDC) LDC (2004) Simple MetaData annotation specification version 6.2, 6th edn. http:\/\/projects.ldc.upenn.edu\/MDE\/Guidelines\/SimpleMDE_V6.2.pdf"},{"key":"436_CR29","volume-title":"Testing statistical hypotheses","author":"EL Lehmann","year":"2005","unstructured":"Lehmann EL (2005) Testing statistical hypotheses, 3rd edn. Springer, New York","edition":"3"},{"key":"436_CR30","unstructured":"Liu Y (2004) Structural event detection for rich transcription of speech. Ph.D. thesis, Purdue University"},{"key":"436_CR31","unstructured":"Liu Y, Chawla N, Shriberg E, Stolcke A, Harper M (2003) Resampling techniques for sentence boundary detection: a case study in machine learning from imbalanced data for spoken language processing. Tech. rep., International Computer Science Institute"},{"key":"436_CR32","unstructured":"Liu Y, Stolcke A, Shriberg E, Harper M (2004) Comparing and combining generative and posterior probability models: some advances in sentence boundary detection in speech. In: Proceedings of the empirical methods in natural language processing (EMNLP)"},{"key":"436_CR33","doi-asserted-by":"crossref","unstructured":"Liu Y, Shriberg E, Stockle A, Harper M (2005) Comparing HMM, maximum entropy, and conditional random fields for disfluency detection. In: Proceedings of the international conference on speech, Lisbon","DOI":"10.3115\/1219840.1219896"},{"key":"436_CR34","unstructured":"Liu Y, Shriberg E, Stolcke A, Peskin B, Ang J, D H, Ostendorf M, Tomalin M, Woodland P, Harper M (2005) Structural metadata research in the EARS program. In: Proceedings of the international conference of acoustics, speech, and signal processing (ICASSP)"},{"key":"436_CR35","unstructured":"McCallum A (2005) Mallet: a machine learning toolkit for language. http:\/\/mallet.cs.umass.edu"},{"key":"436_CR36","volume-title":"Hand and mind: what gestures reveal about thought","author":"D McNeill","year":"1992","unstructured":"McNeill D (1992) Hand and mind: what gestures reveal about thought. Univ. Chicago Press, Chicago"},{"key":"436_CR37","volume-title":"Nonverbal communication","author":"A Mehrabian","year":"1972","unstructured":"Mehrabian A (1972) Nonverbal communication. Aidine-Atherton, Chicago"},{"key":"436_CR38","doi-asserted-by":"crossref","unstructured":"Morency LP, Quattoni A, Darrell T (2007) Latent-dynamic discriminative models for continuous gesture recognition. In: Proceedings of the IEEE computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2007.383299"},{"key":"436_CR39","doi-asserted-by":"crossref","unstructured":"Morgan N, Baron D, Bhagat S, Carvey H, Dhillon R, Edwards J, Gelbart D, Janin A, Krupski A, Peskin B, Pfau T, Shriberg E, Stolcke A, Wooters C (2003) Meetings about meetings: research at ICSI on speech in multiparty conversations. In: Proceedings of the international conference of acoustics, speech, and signal processing (ICASSP), vol\u00a04. Hong Kong, Hong Kong, pp 740\u2013743","DOI":"10.1109\/ICASSP.2003.1202749"},{"key":"436_CR40","doi-asserted-by":"crossref","unstructured":"Qu S, Chai J (2006) Salience modeling based on non-verbal modalities for spoken language understanding. In: Proceedings of the international conference on multimodal interface (ICMI), Banff, Canada","DOI":"10.1145\/1180995.1181036"},{"issue":"3","key":"436_CR41","doi-asserted-by":"crossref","first-page":"171","DOI":"10.1145\/568513.568514","volume":"9","author":"F Quek","year":"2002","unstructured":"Quek F, McNeill D, Bryll R, Duncan S, Ma X, Kirbas C, McCullough KE, Ansari R (2002) Multimodal human discourse: gesture and speech. ACM Trans Comput-Hum Interact 9(3):171\u2013193","journal-title":"ACM Trans Comput-Hum Interact"},{"key":"436_CR42","unstructured":"Quek F et\u00a0al (2002) KDI: cross-model analysis signal and sense- data and computational resources for gesture, speech and gaze research. http:\/\/vislab.cs.vt.edu\/KDI"},{"key":"436_CR43","volume-title":"C4.5: programs for machine learning","author":"JR Quilan","year":"1993","unstructured":"Quilan JR (1993) C4.5: programs for machine learning. Morgan Kaufmann, San Francisco"},{"issue":"1","key":"436_CR44","doi-asserted-by":"crossref","first-page":"4","DOI":"10.1109\/MASSP.1986.1165342","volume":"3","author":"LR Rabiner","year":"1986","unstructured":"Rabiner LR, Juang BH (1986) An introduction to hidden Markov models. IEEE ASSP Mag 3(1):4\u201316","journal-title":"IEEE ASSP Mag"},{"key":"436_CR45","unstructured":"Roark B, Liu Y, Harper M, Stewart R, Lease M, Snover M, Shafran I, Dorr B, Hale J, Krasnyanskaya A, Yung L (2006) Reranking for sentence boundary detection in conversational speech. In: Proceedings of the international conference of acoustics, speech, and signal processing (ICASSP)"},{"key":"436_CR46","doi-asserted-by":"crossref","unstructured":"Rose T, Quek F, Shi Y (2004) MacVissta: a system for multimodal analysis. In: Proceedings of the international conference on multimodal interface (ICMI)","DOI":"10.1145\/1027933.1027976"},{"issue":"1\u20132","key":"436_CR47","doi-asserted-by":"crossref","first-page":"127","DOI":"10.1016\/S0167-6393(00)00028-5","volume":"32","author":"E Shriberg","year":"2000","unstructured":"Shriberg E, Stolcke A, Hakkani-Tur D, Tur G (2000) Prosody-based automatic segmentation of speech into sentences and topics. Speech Commun 32(1\u20132):127\u2013154","journal-title":"Speech Commun"},{"key":"436_CR48","unstructured":"Stevensonm M, Gaizauskasm R (2000) Experiments on sentence boundary detedction. In: Proceedings of the conference of the North American chapter of the association for computational linguistics (NAACL)"},{"key":"436_CR49","doi-asserted-by":"crossref","unstructured":"Stockle A (2002) SRILM\u2014a extensible language modeling toolkit. In: Proceedings of the international conference on spoken language processing (ICSLP)","DOI":"10.21437\/ICSLP.2002-303"},{"key":"436_CR50","unstructured":"Strassel S (2003) Simple metadata annotation specification, 5th edn. Linguistic Data Consortium"},{"key":"436_CR51","unstructured":"Sundaram R, Ganapathiraju A, Hamaker J, Picone J (2001) ISIP 2000 conversational speech evaluation system. In: Proceedings of the speech transcription workshop, College Park, Maryland"},{"key":"436_CR52","volume-title":"Data mining: practical machine learning tools and techniques","author":"IH Witten","year":"2005","unstructured":"Witten IH, Frank E (2005) Data mining: practical machine learning tools and techniques. Morgan Kaufmann, San Francisco"},{"key":"436_CR53","doi-asserted-by":"crossref","unstructured":"Xiong Y, Quek F (2005) Meeting room configuration and multiple camera calibration in meeting analysis. In: Proceedings of the international conference on multimodal interface (ICMI), Trento, Italy","DOI":"10.1145\/1088463.1088474"},{"key":"436_CR54","unstructured":"Zhang L (2005) Maximum Entropy Modeling Toolkit for Python and C+\u2009+. http:\/\/homepages.inf.ed.ac.uk\/s0450736\/maxent_toolkit.html"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-009-0436-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11042-009-0436-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-009-0436-z","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T18:30:11Z","timestamp":1685385011000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11042-009-0436-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,1,23]]},"references-count":54,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2011,2]]}},"alternative-id":["436"],"URL":"https:\/\/doi.org\/10.1007\/s11042-009-0436-z","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2010,1,23]]}}}