{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,31]],"date-time":"2025-10-31T07:43:29Z","timestamp":1761896609244,"version":"3.41.0"},"publisher-location":"Cham","reference-count":82,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319570204"},{"type":"electronic","value":"9783319570211"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-57021-1_16","type":"book-chapter","created":{"date-parts":[[2017,7,19]],"date-time":"2017-07-19T04:43:44Z","timestamp":1500439424000},"page":"467-496","source":"Crossref","is-referenced-by-count":5,"title":["Multimodal Gesture Recognition via Multiple Hypotheses Rescoring"],"prefix":"10.1007","author":[{"given":"Vassilis","family":"Pitsikalis","sequence":"first","affiliation":[]},{"given":"Athanasios","family":"Katsamanis","sequence":"additional","affiliation":[]},{"given":"Stavros","family":"Theodorakis","sequence":"additional","affiliation":[]},{"given":"Petros","family":"Maragos","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,7,20]]},"reference":[{"key":"16_CR1","doi-asserted-by":"crossref","first-page":"323","DOI":"10.1007\/s10209-007-0104-x","volume":"6","author":"U Agris","year":"2008","unstructured":"U. Agris, J. Zieren, U. Canzler, B. Bauer, K.-F. Kraiss, Recent developments in visual sign language recognition. Univers. Access Inf. Soc. 6, 323\u2013362 (2008)","journal-title":"Univers. Access Inf. Soc."},{"issue":"9","key":"16_CR2","doi-asserted-by":"crossref","first-page":"1685","DOI":"10.1109\/TPAMI.2008.203","volume":"31","author":"J Alon","year":"2009","unstructured":"J. Alon, V. Athitsos, O. Yuan, S. Sclaroff, A unified framework for gesture recognition and spatiotemporal gesture segmentation. IEEE Trans. Pattern Anal. Mach. Intell. 31(9), 1685\u20131699 (2009)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"16_CR3","doi-asserted-by":"crossref","unstructured":"A.\u00a0Argyros, M.\u00a0Lourakis, Real time tracking of multiple skin-colored objects with a possibly moving camera, in Proceedings of the European Conference on Computer Vision, 2004","DOI":"10.1007\/978-3-540-24672-5_29"},{"key":"16_CR4","doi-asserted-by":"crossref","unstructured":"B. Bauer, K.F. Kraiss, Towards an automatic sign language recognition system using subunits. in Proceedings of International Gesture Workshop, vol. 2298, 2001, pp. 64\u201375","DOI":"10.1007\/3-540-47873-6_7"},{"key":"16_CR5","doi-asserted-by":"crossref","unstructured":"I.\u00a0Bayer, S.\u00a0Thierry, A multi modal approach to gesture recognition from audio and video data, in Proceedings of the 15th ACM International Conference on Multimodal Interaction (ACM, 2013), pp. 461\u2013466","DOI":"10.1145\/2522848.2532592"},{"issue":"2","key":"16_CR6","doi-asserted-by":"crossref","first-page":"178","DOI":"10.1016\/j.neuropsychologia.2005.05.007","volume":"44","author":"P Bernardis","year":"2006","unstructured":"P. Bernardis, M. Gentilucci, Speech and gesture share the same communication system. Neuropsychologia 44(2), 178\u2013190 (2006)","journal-title":"Neuropsychologia"},{"key":"16_CR7","unstructured":"N.D. Binh, E. Shuichi, T. Ejima, Real-time hand tracking and gesture recognition system, in Proceedings of International Conference on Graphics, Vision and Image Processing (GVIP), 2005, pp. 19\u201321"},{"issue":"3","key":"16_CR8","doi-asserted-by":"crossref","first-page":"257","DOI":"10.1109\/34.910878","volume":"23","author":"AF Bobick","year":"2001","unstructured":"A.F. Bobick, J.W. Davis, The recognition of human movement using temporal templates. IEEE Trans. Pattern Anal. Mach. Intell. 23(3), 257\u2013267 (2001)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"16_CR9","doi-asserted-by":"crossref","unstructured":"R.\u00a0A. Bolt, \u201cPut-that-there\u201d: voice and gesture at the graphics interface, in Proceedings of the 7th Annual Conference on Computer Graphics and Interactive Techniques, vol.\u00a014 ( ACM, 1980)","DOI":"10.1145\/800250.807503"},{"key":"16_CR10","doi-asserted-by":"crossref","unstructured":"H.\u00a0Bourlard, S.\u00a0Dupont, Subband-based speech recognition, in Proceedings of the International Conference on Acoustics, Speech and Signal Processings, vol.\u00a02 (IEEE, Piscataway, 1997), pp. 1251\u20131254","DOI":"10.1109\/ICASSP.1997.596172"},{"key":"16_CR11","doi-asserted-by":"crossref","unstructured":"K.\u00a0Bousmalis, L.\u00a0Morency, M.\u00a0Pantic, Modeling hidden dynamics of multimodal cues for spontaneous agreement and disagreement recognition, in Proceedings of the International Conference on Automatic Face and Gesture Recognition (IEEE, Piscataway, 2011), pp. 746\u2013752","DOI":"10.1109\/FG.2011.5771341"},{"key":"16_CR12","doi-asserted-by":"crossref","unstructured":"P.\u00a0Buehler, M.\u00a0Everingham, A.\u00a0Zisserman, Learning sign language by watching TV (using weakly aligned subtitles), in Proceedings of the International Conference on Computer Vision and Pattern Recognition, 2009","DOI":"10.1109\/CVPR.2009.5206523"},{"key":"16_CR13","first-page":"620","volume":"1","author":"S Celebi","year":"2013","unstructured":"S. Celebi, A.S. Aydin, T.T. Temiz, T. Arici, Gesture recognition using skeleton data with weighted dynamic time warping. Comput. Vis. Theory Appl. 1, 620\u2013625 (2013)","journal-title":"Comput. Vis. Theory Appl."},{"issue":"8","key":"16_CR14","doi-asserted-by":"crossref","first-page":"745","DOI":"10.1016\/S0262-8856(03)00070-2","volume":"21","author":"F-S Chen","year":"2003","unstructured":"F.-S. Chen, C.-M. Fu, C.-L. Huang, Hand gesture recognition using a real-time tracking method and hidden markov models. Image Vis. Comput. 21(8), 745\u2013758 (2003)","journal-title":"Image Vis. Comput."},{"key":"16_CR15","doi-asserted-by":"crossref","unstructured":"X.\u00a0Chen, M.\u00a0Koskela, Online rgb-d gesture recognition with extreme learning machines, in Proceedings of the 15th ACM International Conference on Multimodal Interaction (ACM, 2013), pp. 467\u2013474","DOI":"10.1145\/2522848.2532591"},{"key":"16_CR16","doi-asserted-by":"crossref","unstructured":"Y.\u00a0L. Chow, R.\u00a0Schwartz, The n-best algorithm: An efficient procedure for finding top n sentence hypotheses, in Proceedings of the Workshop on Speech and Natural Language (Association for Computational Linguistics, 1989), pp. 199\u2013202","DOI":"10.3115\/1075434.1075467"},{"key":"16_CR17","unstructured":"S.\u00a0Conseil, S.\u00a0Bourennane, L.\u00a0Martin, Comparison of Fourier descriptors and Hu moments for hand posture recognition, in Proceedings of the European Conference on Signal Processing, 2007"},{"issue":"2","key":"16_CR18","doi-asserted-by":"crossref","first-page":"157","DOI":"10.1006\/cviu.2000.0837","volume":"78","author":"Y Cui","year":"2000","unstructured":"Y. Cui, J. Weng, Appearance-based hand sign recognition from intensity image sequences. Comput. Vis. Image Underst. 78(2), 157\u2013176 (2000)","journal-title":"Comput. Vis. Image Underst."},{"key":"16_CR19","doi-asserted-by":"crossref","unstructured":"N.\u00a0Dalal, B.\u00a0Triggs, Histogram of oriented gradients for human detection, in Proceedins International Conference on Computer Vision and Pattern Recognition, 2005","DOI":"10.1109\/CVPR.2005.177"},{"key":"16_CR20","unstructured":"W.\u00a0Du, J.\u00a0Piater, Hand modeling and tracking for video-based sign language recognition by robust principal component analysis, in Proceedings of the ECCV Workshop on Sign, Gesture and Activity, September 2010"},{"key":"16_CR21","doi-asserted-by":"crossref","unstructured":"S.\u00a0Escalera, J.\u00a0Gonz\u00e0lez, X.\u00a0Bar\u00f3, M.\u00a0Reyes, I.\u00a0Guyon, V.\u00a0Athitsos, H.\u00a0Escalante, L.\u00a0Sigal, A.\u00a0Argyros, C.\u00a0Sminchisescu, R.\u00a0Bowden, S.\u00a0Sclaroff, Chalearn multi-modal gesture recognition 2013: grand challenge and workshop summary, in Proceedings of the 15th ACM on International Conference on Multimodal Interaction (ACM, 2013a), pp. 365\u2013368","DOI":"10.1145\/2522848.2532597"},{"key":"16_CR22","doi-asserted-by":"crossref","unstructured":"S.\u00a0Escalera, J.\u00a0Gonzlez, X.\u00a0Bar, M.\u00a0Reyes, O.\u00a0Lopes, I.\u00a0Guyon, V.\u00a0Athitsos, H.J. Escalante. Multi-modal Gesture Recognition Challenge 2013: Dataset and Results, in 15th ACM International Conference on Multimodal Interaction (ICMI), ChaLearn Challenge and Workshop on Multi-modal Gesture Recognition (ACM, 2013b)","DOI":"10.1145\/2522848.2532597"},{"key":"16_CR23","doi-asserted-by":"crossref","unstructured":"J.\u00a0Foote, An overview of audio information retrieval. Multimedia Syst. 7(1):2\u201310 (1999), http:\/\/link.springer.com\/article\/10.1007\/s005300050106","DOI":"10.1007\/s005300050106"},{"key":"16_CR24","doi-asserted-by":"crossref","unstructured":"L.\u00a0Gillick, S.J. Cox, Some statistical issues in the comparison of speech recognition algorithms, in Proceedings of the International Conference on Acoustics, Speech and Signal Processing, vol. 1, May 1989, pp. 532\u2013535","DOI":"10.1109\/ICASSP.1989.266481"},{"key":"16_CR25","doi-asserted-by":"crossref","unstructured":"H.\u00a0Glotin, D.\u00a0Vergyr, C.\u00a0Neti, G.\u00a0Potamianos, J.\u00a0Luettin, Weighting schemes for audio-visual fusion in speech recognition, in Proceedings of the International Conference on Acoustics, Speech and Signal Processing, vol.\u00a01 (IEEE, Piscataway, 2001), pp. 173\u2013176","DOI":"10.1109\/ICASSP.2001.940795"},{"issue":"8","key":"16_CR26","doi-asserted-by":"crossref","first-page":"1845","DOI":"10.1162\/jocn.2010.21462","volume":"23","author":"B Habets","year":"2011","unstructured":"B. Habets, S. Kita, Z. Shao, A. \u00d6zyurek, P. Hagoort, The role of synchrony and ambiguity in speech-gesture integration during comprehension. J. Cogn. Neurosci. 23(8), 1845\u20131854 (2011)","journal-title":"J. Cogn. Neurosci."},{"key":"16_CR27","doi-asserted-by":"crossref","first-page":"623","DOI":"10.1016\/j.patrec.2008.12.010","volume":"30","author":"J Han","year":"2009","unstructured":"J. Han, G. Awad, A. Sutherland, Modelling and segmenting subunits for sign language recognition based on hand motion analysis. Pattern Recognit. Lett. 30, 623\u2013633 (2009)","journal-title":"Pattern Recognit. Lett."},{"issue":"4","key":"16_CR28","doi-asserted-by":"crossref","first-page":"1738","DOI":"10.1121\/1.399423","volume":"87","author":"H Hermansky","year":"1990","unstructured":"H. Hermansky, Perceptual linear predictive (PLP) analysis of speech. J. Acoust. Soc. Am. 87(4), 1738\u20131752 (1990)","journal-title":"J. Acoust. Soc. Am."},{"key":"16_CR29","doi-asserted-by":"crossref","unstructured":"A. Hern\u00e1ndez-Vela, M.\u00c1. Bautista, X. Perez-Sala, V. Ponce-L\u00f3pez, S. Escalera, X. Bar\u00f3, O. Pujol, C. Angulo, Probability-based dynamic time warping and bag-of-visual-and-depth-words for human gesture recognition in rgb-d. Pattern Recognit. Lett. (2013)","DOI":"10.1016\/j.patrec.2013.09.009"},{"issue":"5","key":"16_CR30","doi-asserted-by":"crossref","first-page":"243","DOI":"10.1007\/s001380050144","volume":"12","author":"C-L Huang","year":"2001","unstructured":"C.-L. Huang, S.-H. Jeng, A model-based hand gesture recognition system. Mach. Vis. Appl. 12(5), 243\u2013258 (2001)","journal-title":"Mach. Vis. Appl."},{"issue":"1","key":"16_CR31","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1023\/A:1008078328650","volume":"29","author":"M Isard","year":"1998","unstructured":"M. Isard, A. Blake, Condensation-conditional density propagation for visual tracking. Int. J. Comput. Vis. 29(1), 5\u201328 (1998)","journal-title":"Int. J. Comput. Vis."},{"issue":"6708","key":"16_CR32","doi-asserted-by":"crossref","first-page":"228","DOI":"10.1038\/24300","volume":"396","author":"JM Iverson","year":"1998","unstructured":"J.M. Iverson, S. Goldin-Meadow, Why people gesture when they speak. Nature 396(6708), 228 (1998)","journal-title":"Nature"},{"issue":"1","key":"16_CR33","doi-asserted-by":"crossref","first-page":"116","DOI":"10.1016\/j.cviu.2006.10.019","volume":"108","author":"A Jaimes","year":"2007","unstructured":"A. Jaimes, N. Sebe, Multimodal human-computer interaction: a survey. Comput. Vis. Image Underst. 108(1), 116\u2013134 (2007)","journal-title":"Comput. Vis. Image Underst."},{"issue":"2","key":"16_CR34","doi-asserted-by":"crossref","first-page":"260","DOI":"10.1177\/0956797609357327","volume":"21","author":"SD Kelly","year":"2010","unstructured":"S.D. Kelly, A. \u00d6zy\u00fcrek, E. Maris, Two sides of the same coin speech and gesture mutually interact to enhance comprehension. Psychol. Sci. 21(2), 260\u2013267 (2010)","journal-title":"Psychol. Sci."},{"key":"16_CR35","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9780511807572","volume-title":"Gesture: Visible Action as Utterance","author":"A Kendon","year":"2004","unstructured":"A. Kendon, Gesture: Visible Action as Utterance (Cambridge University Press, New York, 2004)"},{"key":"16_CR36","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1007\/s11265-008-0292-5","volume":"59","author":"W Kong","year":"2010","unstructured":"W. Kong, S. Ranganath, Sign language phoneme transcription with rule-based hand trajectory segmentation. J. Signal Process. Syst. 59, 211\u2013222 (2010)","journal-title":"J. Signal Process. Syst."},{"key":"16_CR37","doi-asserted-by":"crossref","unstructured":"I.\u00a0Laptev, M.\u00a0Marszalek, C.\u00a0Schmid, B. Rozenfeld, Learning realistic human actions from movies, in Proceedings of the International Conference on Computer Vision and Pattern Recognition (IEEE, Piscataway, 2008), pp. 1\u20138","DOI":"10.1109\/CVPR.2008.4587756"},{"issue":"10","key":"16_CR38","doi-asserted-by":"crossref","first-page":"961","DOI":"10.1109\/34.799904","volume":"21","author":"H-K Lee","year":"1999","unstructured":"H.-K. Lee, J.-H. Kim, An HMM-based threshold model approach for gesture recognition. IEEE Trans. Pattern Anal. Mach. Intell. 21(10), 961\u2013973 (1999)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"11","key":"16_CR39","doi-asserted-by":"crossref","first-page":"3012","DOI":"10.1016\/j.patcog.2007.02.010","volume":"40","author":"J Li","year":"2007","unstructured":"J. Li, N.M. Allinson, Simultaneous gesture segmentation and recognition based on forward spotting accumulative hmms. Pattern Recognit. 40(11), 3012\u20133026 (2007)","journal-title":"Pattern Recognit."},{"issue":"10","key":"16_CR40","doi-asserted-by":"crossref","first-page":"1771","DOI":"10.1016\/j.neucom.2007.11.032","volume":"71","author":"J Li","year":"2008","unstructured":"J. Li, N.M. Allinson, A comprehensive review of current local features for computer vision. Neurocomputing 71(10), 1771\u20131787 (2008)","journal-title":"Neurocomputing"},{"key":"16_CR41","doi-asserted-by":"crossref","unstructured":"D.\u00a0G. Lowe, Object recognition from local scale-invariant features, in Proceedings of the International Conference on Computer Vision, 1999, pp. 1150\u20131157","DOI":"10.1109\/ICCV.1999.790410"},{"key":"16_CR42","doi-asserted-by":"crossref","unstructured":"P.\u00a0Maragos, P.\u00a0Gros, A.\u00a0Katsamanis, G. Papandreou, Cross-modal integration for performance improving in multimedia: a review, in Multimodal Processing and Interaction: Audio, Video, Text ed. by P.\u00a0Maragos, A.\u00a0Potamianos, and P.\u00a0Gros, chapter\u00a01 (Springer, New York, 2008), pp. 3\u201348","DOI":"10.1007\/978-0-387-76316-3_1"},{"key":"16_CR43","volume-title":"Hand and Mind: What Gestures Reveal About Thought","author":"D McNeill","year":"1992","unstructured":"D. McNeill, Hand and Mind: What Gestures Reveal About Thought (University of Chicago Press, Chicago, 1992)"},{"issue":"1","key":"16_CR44","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1186\/1687-4722-2014-2","volume":"2014","author":"M Miki","year":"2014","unstructured":"M. Miki, N. Kitaoka, C. Miyajima, T. Nishino, K. Takeda, Improvement of multimodal gesture and speech recognition performance using time intervals between gestures and accompanying speech. EURASIP J. Audio Speech Music Process. 2014(1), 17 (2014). doi: 10.1186\/1687-4722-2014-2","journal-title":"EURASIP J. Audio Speech Music Process."},{"key":"16_CR45","volume-title":"Gestures: Their Origins and Distribution","author":"d Morris","year":"1979","unstructured":"d Morris, p Collett, p Marsh, M. O\u2019Shaughnessy, Gestures: Their Origins and Distribution (Stein and Day, New York, 1979)"},{"key":"16_CR46","doi-asserted-by":"crossref","unstructured":"Y.\u00a0Nam, K.\u00a0Wohn, Recognition of space-time hand-gestures using hidden Markov model, in ACM Symposium on Virtual Reality Software and Technology, 1996, pp. 51\u201358","DOI":"10.1145\/3304181.3304193"},{"key":"16_CR47","doi-asserted-by":"crossref","unstructured":"K.\u00a0Nandakumar, K.\u00a0W. Wan, S.\u00a0Chan, W.\u00a0Ng, J.\u00a0G. Wang, and W.\u00a0Y. Yau. A multi-modal gesture recognition system using audio, video, and skeletal joint data. in Proceedings of the 15th ACM Int\u2019l Conf. on Multimodal Interaction (ACM, 2013), pages 475\u2013482","DOI":"10.1145\/2522848.2532593"},{"key":"16_CR48","doi-asserted-by":"crossref","unstructured":"N.\u00a0Neverova, C.\u00a0Wolf, G.\u00a0Paci, G.\u00a0Sommavilla, G.\u00a0Taylor, F.\u00a0Nebout, A multi-scale approach to gesture detection and recognition, in Proceedings of the IEEE International Conference on Computer Vision Workshop, 2013, pp. 484\u2013491","DOI":"10.1109\/ICCVW.2013.69"},{"key":"16_CR49","unstructured":"E.-J. Ong, R.\u00a0Bowden, A boosted classifier tree for hand shape detection, in Proceedings of the International Conference on Automation Face Gest Recognition (IEEE, Piscataway, 2004), pp. 889\u2013894"},{"key":"16_CR50","doi-asserted-by":"crossref","unstructured":"M.\u00a0Ostendorf, A.\u00a0Kannan, S.\u00a0Austin, O.\u00a0Kimball, R.\u00a0M. Schwartz, J.\u00a0R. Rohlicek, Integration of diverse recognition methodologies through reevaluation of N-best sentence hypotheses, in HLT, 1991","DOI":"10.3115\/112405.112416"},{"issue":"3","key":"16_CR51","doi-asserted-by":"crossref","first-page":"45","DOI":"10.1145\/330534.330538","volume":"43","author":"S Oviatt","year":"2000","unstructured":"S. Oviatt, P. Cohen, Perceptual user interfaces: multimodal interfaces that process what comes naturally. Commun. ACM 43(3), 45\u201353 (2000)","journal-title":"Commun. ACM"},{"issue":"3","key":"16_CR52","doi-asserted-by":"crossref","first-page":"423","DOI":"10.1109\/TASL.2008.2011515","volume":"17","author":"G Papandreou","year":"2009","unstructured":"G. Papandreou, A. Katsamanis, V. Pitsikalis, P. Maragos, Adaptive multimodal fusion by uncertainty compensation with application to audiovisual speech recognition. IEEE Trans. Audio Speech Lang. Process. 17(3), 423\u2013435 (2009)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"16_CR53","doi-asserted-by":"crossref","unstructured":"V.\u00a0Pitsikalis, S.\u00a0Theodorakis, C.\u00a0Vogler, P.\u00a0Maragos, Advances in phonetics-based sub-unit modeling for transcription alignment and sign language recognition, in IEEE CVPR Workshop on Gesture Recognition, 2011","DOI":"10.1109\/CVPRW.2011.5981681"},{"key":"16_CR54","unstructured":"I. Poddar, Y. Sethi, E. Ozyildiz, R. Sharma, Toward natural gesture\/speech HCI: A case study of weather narration, in Proceedings of the Workshop on Perceptual User Interfaces, 1998"},{"key":"16_CR55","doi-asserted-by":"crossref","unstructured":"V.\u00a0Ponce-L\u00f3pez, S.\u00a0Escalera, X.\u00a0Bar\u00f3, Multi-modal social signal analysis for predicting agreement in conversation settings, in Proceedings of the 15th ACM International Conference on Multimodal Interaction (ACM, 2013), pp. 495\u2013502","DOI":"10.1145\/2522848.2532594"},{"key":"16_CR56","first-page":"23","volume":"22","author":"G Potamianos","year":"2004","unstructured":"G. Potamianos, C. Neti, J. Luettin, I. Matthews, Audio-visual automatic speech recognition: an overview. Issues Vis. Audio Vis Speech Process. 22, 23 (2004)","journal-title":"Issues Vis. Audio Vis Speech Process."},{"key":"16_CR57","volume-title":"Fundamentals of Speech Recognition","author":"LR Rabiner","year":"1993","unstructured":"L.R. Rabiner, B.H. Juang, Fundamentals of Speech Recognition (Prentice Hall, Upper Saddle River, 1993)"},{"key":"16_CR58","doi-asserted-by":"crossref","unstructured":"Z.\u00a0Ren, J.\u00a0Yuan, Z.\u00a0Zhang, Robust hand gesture recognition based on finger-earth mover\u2019s distance with a commodity depth camera, in Proceedings of the 19th ACM International Conference on Multimedia (ACM, 2011), pp. 1093\u20131096","DOI":"10.1145\/2072298.2071946"},{"key":"16_CR59","doi-asserted-by":"crossref","unstructured":"R.\u00a0C. Rose, Discriminant wordspotting techniques for rejecting non-vocabulary utterances in unconstrained speech, in Proceedings of the International Conference on Acoustics, Speech and Signal Processing, vol.\u00a02 (IEEE, Piscataway, 1992), pp. 105\u2013108, http:\/\/ieeexplore.ieee.org\/xpls\/abs_all.jsp?arnumber=226109","DOI":"10.1109\/ICASSP.1992.226109"},{"key":"16_CR60","doi-asserted-by":"crossref","unstructured":"R.\u00a0C. Rose, D.\u00a0B. Paul, A hidden Markov model based keyword recognition system, in Proceedings of the International Conference on Acoustics, Speech and Signal Processing, 1990, pp. 129\u2013132, http:\/\/ieeexplore.ieee.org\/xpls\/abs_all.jsp?arnumber=115555","DOI":"10.1109\/ICASSP.1990.115555"},{"issue":"1","key":"16_CR61","first-page":"1627","volume":"14","author":"A Roussos","year":"2013","unstructured":"A. Roussos, S. Theodorakis, V. Pitsikalis, P. Maragos, Dynamic affine-invariant shape-appearance handshape features and classification in sign language videos. J. Mach. Learn. Res. 14(1), 1627\u20131663 (2013)","journal-title":"J. Mach. Learn. Res."},{"key":"16_CR62","doi-asserted-by":"crossref","unstructured":"S.\u00a0Ruffieux, D.\u00a0Lalanne, E.\u00a0Mugellini, ChAirGest: a challenge for multimodal mid-air gesture recognition for close HCI, in Proceedings of the 15th ACM International Conference on Multimodal Interaction, ICMI \u201913 (ACM, New York, NY, USA, 2013), pp. 483\u2013488","DOI":"10.1145\/2522848.2532590"},{"key":"16_CR63","doi-asserted-by":"crossref","unstructured":"S.\u00a0Ruffieux, D.\u00a0Lalanne, E.\u00a0Mugellini, O.\u00a0A. Khaled, A survey of datasets for human gesture recognition, in Human-Computer Interaction. Advanced Interaction Modalities and Techniques (Springer, 2014), pp. 337\u2013348","DOI":"10.1007\/978-3-319-07230-2_33"},{"issue":"9","key":"16_CR64","doi-asserted-by":"crossref","first-page":"1327","DOI":"10.1109\/JPROC.2003.817145","volume":"91","author":"R Sharma","year":"2003","unstructured":"R. Sharma, M. Yeasin, N. Krahnstoever, I. Rauschert, G. Cai, I. Brewer, A.M. MacEachren, K. Sengupta, Speech-gesture driven multimodal interfaces for crisis management. Proc. IEEE 91(9), 1327\u20131354 (2003)","journal-title":"Proc. IEEE"},{"issue":"4","key":"16_CR65","doi-asserted-by":"crossref","first-page":"505","DOI":"10.1016\/S0959-4388(00)00241-5","volume":"11","author":"S Shimojo","year":"2001","unstructured":"S. Shimojo, L. Shams, Sensory modalities are not separate modalities: plasticity and interactions. Curr. Opin. Neurobiol. 11(4), 505\u2013509 (2001)","journal-title":"Curr. Opin. Neurobiol."},{"issue":"1","key":"16_CR66","doi-asserted-by":"crossref","first-page":"116","DOI":"10.1145\/2398356.2398381","volume":"56","author":"J Shotton","year":"2013","unstructured":"J. Shotton, T. Sharp, A. Kipman, A. Fitzgibbon, M. Finocchio, A. Blake, M. Cook, R. Moore, Real-time human pose recognition in parts from single depth images. Commun. ACM 56(1), 116\u2013124 (2013)","journal-title":"Commun. ACM"},{"key":"16_CR67","doi-asserted-by":"crossref","unstructured":"R.\u00a0Shwartz, S.\u00a0Austin, A comparison of several approximate algorithms for finding multiple N-Best sentence hypotheses, in Proceedings of the International Conference on Acoustics, Speech and Signal Processing, 1991","DOI":"10.1109\/ICASSP.1991.150436"},{"key":"16_CR68","doi-asserted-by":"crossref","unstructured":"Y.\u00a0C. Song, H.\u00a0Kautz, J.\u00a0Allen, M.\u00a0Swift, Y.\u00a0Li, J.\u00a0Luo, C.\u00a0Zhang, A markov logic framework for recognizing complex events from multimodal data, in Proceedings of the 15th ACM International Conference on Multimodal Interaction (ACM, 2013), pp. 141\u2013148","DOI":"10.1145\/2522848.2522883"},{"issue":"12","key":"16_CR69","doi-asserted-by":"crossref","first-page":"1371","DOI":"10.1109\/34.735811","volume":"20","author":"T Starner","year":"1998","unstructured":"T. Starner, J. Weaver, A. Pentland, Real-time american sign language recognition using desk and wearable computer based video. IEEE Trans. Pattern Anal. Mach. Intell. 20(12), 1371\u20131375 (1998)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"16_CR70","doi-asserted-by":"crossref","unstructured":"L.\u00a0N. Tan, B.\u00a0J. Borgstrom, A.\u00a0Alwan, Voice activity detection using harmonic frequency components in likelihood ratio test, in Proceedings of the International Conference on Acoustics, Speech and Signal Processing (IEEE, Piscataway, 2010), pp. 4466\u20134469","DOI":"10.1109\/ICASSP.2010.5495611"},{"key":"16_CR71","unstructured":"N. Tanibata, N. Shimada, Y. Shirai, Extraction of hand features for recognition of sign language words, in Proceedings of the International Conference on Vision, Interface, 2002, pp. 391\u2013398"},{"issue":"8","key":"16_CR72","first-page":"533549","volume":"32","author":"S Theodorakis","year":"2014","unstructured":"S. Theodorakis, V. Pitsikalis, P. Maragos, Dynamic-static unsupervised sequentiality, statistical subunits and lexicon for sign language recognition. Imave Vis. Comput. 32(8), 533549 (2014)","journal-title":"Imave Vis. Comput."},{"key":"16_CR73","doi-asserted-by":"crossref","first-page":"189","DOI":"10.1016\/j.patrec.2013.07.003","volume":"36","author":"M Turk","year":"2014","unstructured":"M. Turk, Multimodal interaction: a review. Pattern. Recognit. Lett. 36, 189\u2013195 (2014)","journal-title":"Pattern. Recognit. Lett."},{"key":"16_CR74","doi-asserted-by":"crossref","first-page":"358","DOI":"10.1006\/cviu.2000.0895","volume":"81","author":"C Vogler","year":"2001","unstructured":"C. Vogler, D. Metaxas, A framework for recognizing the simultaneous aspects of american sign language. Comput. Vis. Image Underst. 81, 358 (2001)","journal-title":"Comput. Vis. Image Underst."},{"key":"16_CR75","doi-asserted-by":"crossref","unstructured":"S.\u00a0B. Wang, A.\u00a0Quattoni, L.\u00a0Morency, D.\u00a0Demirdjian, T.\u00a0Darrell, Hidden conditional random fields for gesture recognition, in Proceedings of the International Conference on Computer Vision and Pattern Recognition, vol.\u00a02 (IEEE, Piscataway, 2006), pp. 1521\u20131527","DOI":"10.1109\/CVPR.2006.132"},{"key":"16_CR76","doi-asserted-by":"crossref","unstructured":"D.\u00a0Weimer, S.\u00a0Ganapathy, A synthetic visual environment with hand gesturing and voice input, in ACM SIGCHI Bulletin, vol.\u00a020 (ACM, 1989), pp. 235\u2013240","DOI":"10.1145\/67449.67495"},{"key":"16_CR77","doi-asserted-by":"crossref","unstructured":"L.\u00a0D Wilcox, M.\u00a0Bush, Training and search algorithms for an interactive wordspotting system, in Proceedings of the International Conference on Acoustics, Speech and Signal Processing, vol.\u00a02 (IEEE, Piscataway, 1992), pp. 97\u2013100","DOI":"10.1109\/ICASSP.1992.226111"},{"issue":"11","key":"16_CR78","doi-asserted-by":"crossref","first-page":"1870","DOI":"10.1109\/29.103088","volume":"38","author":"J Wilpon","year":"1990","unstructured":"J. Wilpon, L.R. Rabiner, C.-H. Lee, E.R. Goldman, Automatic recognition of keywords in unconstrained speech using hidden Markov models. IEEE Trans. Acoustics Speech Signal Process. 38(11), 1870\u20131878 (1990)","journal-title":"IEEE Trans. Acoustics Speech Signal Process."},{"key":"16_CR79","doi-asserted-by":"crossref","first-page":"884","DOI":"10.1109\/34.790429","volume":"21","author":"A Wilson","year":"1999","unstructured":"A. Wilson, A. Bobick, Parametric hidden markov models for gesture recognition. IEEE Trans. Pattern Anal. Mach. Intell. 21, 884\u2013900 (1999)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"16_CR80","doi-asserted-by":"crossref","unstructured":"J.\u00a0Wu, J.\u00a0Cheng, C.\u00a0Zhao, H.\u00a0Lu. Fusing multi-modal features for gesture recognition, in Proceedings of the 15th ACM International Conference on Multimodal Interaction (ACM, 2013), pp. 453\u2013460","DOI":"10.1145\/2522848.2532589"},{"issue":"8","key":"16_CR81","doi-asserted-by":"crossref","first-page":"1061","DOI":"10.1109\/TPAMI.2002.1023803","volume":"24","author":"M-H Yang","year":"2002","unstructured":"M.-H. Yang, N. Ahuja, M. Tabb, Extraction of 2d motion trajectories and its application to hand gesture recognition. IEEE Trans. Pattern Anal. Mach. Intell. 24(8), 1061\u20131074 (2002)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"16_CR82","volume-title":"The HTK Book","author":"S Young","year":"2002","unstructured":"S. Young, G. Evermann, T. Hain, D. Kershaw, G. Moore, J. Odell, D. Ollason, D. Povey, V. Valtchev, P. Woodland, The HTK Book (Entropic Cambridge Research Laboratory, Cambridge, 2002)"}],"container-title":["The Springer Series on Challenges in Machine Learning","Gesture Recognition"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-57021-1_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,24]],"date-time":"2025-06-24T15:08:43Z","timestamp":1750777723000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-57021-1_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319570204","9783319570211"],"references-count":82,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-57021-1_16","relation":{},"ISSN":["2520-131X","2520-1328"],"issn-type":[{"type":"print","value":"2520-131X"},{"type":"electronic","value":"2520-1328"}],"subject":[],"published":{"date-parts":[[2017]]}}}