{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T07:32:45Z","timestamp":1767339165594,"version":"3.41.2"},"reference-count":32,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[1998,8,1]],"date-time":"1998-08-01T00:00:00Z","timestamp":901929600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[1998,8,1]],"date-time":"1998-08-01T00:00:00Z","timestamp":901929600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Machine Learning"],"published-print":{"date-parts":[[1998,8]]},"DOI":"10.1023\/a:1007468413059","type":"journal-article","created":{"date-parts":[[2002,12,22]],"date-time":"2002-12-22T04:48:21Z","timestamp":1040532501000},"page":"85-100","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":18,"title":["Robust Sensor Fusion: Analysis and Application to Audio Visual Speech Recognition"],"prefix":"10.1007","volume":"32","author":[{"given":"Javier R.","family":"Movellan","sequence":"first","affiliation":[]},{"given":"Paul","family":"Mineiro","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"168699_CR1","doi-asserted-by":"crossref","unstructured":"Adjondani, A. & Benoit, C. (1995). Audio-visual speech recognition compared across two architectures. Proceedings of the Eurospeech'95 Conference (pp. 1563\u20131566). Madrid, Spain.","DOI":"10.21437\/Eurospeech.1995-382"},{"key":"168699_CR2","volume-title":"Speechreading by Humans and Machines: Models, Systems, and Applications","author":"A. Adjondani","year":"1996","unstructured":"Adjondani, A. & Benoit, C. (1996). On the Integration of Auditory and Visual Parameters in an HMM-based ASR. In D.G. Stork & M.E. Hennecke (eds.), Speechreading by Humans and Machines: Models, Systems, and Applications. New York: NATO\/Springer-Verlag."},{"key":"168699_CR3","unstructured":"Bernstein, L. & Benoit, C. (1996). For Speech Perception Three Senses are Better than One. Proceedings of the 4th International Conference on Spoken Language Processing, Philadelphia, PA."},{"key":"168699_CR4","doi-asserted-by":"crossref","first-page":"383","DOI":"10.2307\/2982063","volume":"143","author":"G.E.P. Box","year":"1980","unstructured":"Box, G.E.P. (1980). Sampling and Bayes inference in scientific modeling. J. Roy. Stat. Soc., A., 143, 383\u2013430.","journal-title":"J. Roy. Stat. Soc., A."},{"key":"168699_CR5","first-page":"557","volume-title":"Proc. Int. Conf. on Acoust., Speech, and Signal Processing","author":"C. Bregler","year":"1993","unstructured":"Bregler, C., Hild, H., Manke, S., & Waibel, A. (1993). Improving Connected Letter Recognition by Lipreading. Proc. Int. Conf. on Acoust., Speech, and Signal Processing (pp. 557\u2013560), Minneapolis. IEEE."},{"key":"168699_CR6","doi-asserted-by":"crossref","unstructured":"Bregler, C., Manke, S., & Waibel, A. (1993). Bimodal Sensor Integration on the Example of Speech-Reading. Proceedings of the IEEE International Conference on Neural Networks (pp. 667\u2013671).","DOI":"10.1109\/ICNN.1993.298634"},{"key":"168699_CR7","doi-asserted-by":"crossref","unstructured":"Bregler, C., Omohundro, S.M., & Konig, Y. (1994). A Hybrid Approach to Bimodal Speech Recognition. 28th Annual Asilomar Conference on Signals, Systems, and Computers (pp. 556\u2013560), Pacific Grove, CA.","DOI":"10.1109\/ACSSC.1994.471514"},{"key":"168699_CR8","volume-title":"Attention and performance XVI: Information integration in perception and communication","author":"H.H. B\u00fclthoff","year":"1996","unstructured":"B\u00fclthoff, H.H. & Yuille, A.L. (1996). A Bayesian framework for the integration of visual modules. In T. Inui & J.L. McClelland (eds.), Attention and performance XVI: Information integration in perception and communication. Cambridge, MA: MIT Press."},{"key":"168699_CR9","unstructured":"Chadderdon, G. & Movellan, J.R. (1995). Testing for Channel Independence in Bimodal Speech Recognition. Proceedings of 2nd Joint Symposium on Neural Computation (pp. 84\u201390). University of California San Diego and California Institute of Technology."},{"key":"168699_CR10","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4757-2076-1","volume-title":"Data Fusion for Sensory Information Processing Systems","author":"J.J. Clark","year":"1990","unstructured":"Clark, J.J. & Yuille, A.L. (1990). Data Fusion for Sensory Information Processing Systems. Boston: Kluwer Academic Publishers."},{"key":"168699_CR11","doi-asserted-by":"crossref","unstructured":"Cosi, P., Magno Caldognetto, E., Vagges, K., Mian, G.A., & Contolini, M. (1994). Bimodal recognition experiments with recurrent neural networks. Proc. Int. Conf. on Acoust., Speech, and Signal Processing (pp. 553\u2013556).","DOI":"10.1109\/ICASSP.1994.389596"},{"key":"168699_CR12","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","volume":"39","author":"A.P. Dempster","year":"1977","unstructured":"Dempster, A.P., Laird, N.M., & Rubin, D.B. (1977). Maximum likelihood from incomplete data via the EM algorithm. Journal of the Royal Statistical Society, 39, 1\u201338.","journal-title":"Journal of the Royal Statistical Society"},{"key":"168699_CR13","first-page":"19","volume":"2","author":"V. de Sa","year":"1994","unstructured":"de Sa, V. (1994). Combining Uni-Modal Classifiers to Improve Learning. In Integration of Elementary Functions into Complex Behavior, volume 2, 19\u201329.","journal-title":"Integration of Elementary Functions into Complex Behavior"},{"key":"168699_CR14","doi-asserted-by":"crossref","DOI":"10.1137\/1.9781611970319","volume-title":"The jacknife, the bootstrap and other resampling plans.","author":"A. Efron","year":"1982","unstructured":"Efron, A. (1982). The jacknife, the bootstrap and other resampling plans. SIAM, Philadelphia, PA."},{"key":"168699_CR15","volume-title":"Advances in Neural Information Processing Systems","author":"M.S. Gray","year":"1997","unstructured":"Gray, M.S., Movellan, J.R., & Sejnowski, T. (1997). Dynamic features for visual speechreading: A systematic comparison. In Advances in Neural Information Processing Systems, 9. Cambridge, MA: MIT Press."},{"key":"168699_CR16","volume-title":"Speechreading by Humans and Machines: Models, Systems, and Applications","author":"M.E. Hennecke","year":"1996","unstructured":"Hennecke, M.E., Stork, D.G., & Ventakesh Prasad, K. (1996). Visionary speech: looking ahead to practical speech reading systems. In D.G. Stork & M.E. Hennecke (eds.), Speechreading by Humans and Machines: Models, Systems, and Applications. New York: NATO\/Springer-Verlag."},{"key":"168699_CR17","first-page":"578","volume-title":"28th Asilomar Conference on Signals, Systems, and Computers","author":"M.E. Hennecke","year":"1994","unstructured":"Hennecke, M.E., Venkatesh Prasad, K., & Stork, D.G. (1994). Using Deformable Templates to InferVisual Speech Dynamics. 28th Asilomar Conference on Signals, Systems, and Computers (pp. 578\u2013582). Pacific Grove, CA: IEEE Computer Society Press."},{"key":"168699_CR18","volume-title":"Understanding robust and exploratory data analysis","author":"D.C. Hoglin","year":"1983","unstructured":"Hoglin, D.C., Mosteller, F., & Tukey, J.W. (1983). Understanding robust and exploratory data analysis. New York: John Wiley."},{"key":"168699_CR19","doi-asserted-by":"crossref","first-page":"1138","DOI":"10.1126\/science.7146899","volume":"218","author":"P.K. Kuhl","year":"1982","unstructured":"Kuhl, P.K. & Meltzoff, A.M. (1982). The bimodal perception of speech in infancy. Science, 218, 1138\u20131141.","journal-title":"Science"},{"key":"168699_CR20","volume-title":"Maximum entropy and Bayesian methods, Santa Barbara","author":"D.J.C. MacKay","year":"1993","unstructured":"MacKay, D.J.C. (1996). Hyperparameters: Optimise or interate out?. In G. Heidbreder (ed.), Maximum entropy and Bayesian methods, Santa Barbara 1993. Dordrecht: Kluwer."},{"key":"168699_CR21","volume-title":"Speech Perception by Ear and Eye: A Paradigm for Psychological Inquiry","author":"D.W. Massaro","year":"1987","unstructured":"Massaro, D.W. (1987). Speech Perception by Ear and Eye: A Paradigm for Psychological Inquiry. Hillsdale, NJ: Lawrence Erlbaum Associates."},{"key":"168699_CR22","doi-asserted-by":"crossref","unstructured":"McGurk, H. & MacDonald, J. (1976). Hearing Lips and Seeing Voices. Nature, 264, 746\u2013748.","DOI":"10.1038\/264746a0"},{"key":"168699_CR23","volume-title":"Advances in neural information processing systems","author":"J.R. Movellan","year":"1995","unstructured":"Movellan, J.R. (1995). Visual speech recognition with stochastic neural networks. In G. Tesauro, D. Touretzky, & T. Leen (eds.), Advances in neural information processing systems. Cambridge, MA: MIT Press."},{"key":"168699_CR24","volume-title":"Speechreading by Humans and Machines: Models, Systems, and Applications","author":"J.R. Movellan","year":"1996","unstructured":"Movellan, J.R. & Chadderdon, G. (1996). Channel Separability in the Audio Visual Integration of Speech: A Bayesian Approach. In D.G. Stork & M.E. Hennecke (eds.), Speechreading by Humans and Machines: Models, Systems, and Applications. New York: NATO\/Springer-Verlag."},{"key":"168699_CR25","volume-title":"Proceedings of the Eight Annual Conference of the Cognitive Science Society","author":"J.R. Movellan","year":"1996","unstructured":"Movellan, J.R. & Prayaga, R.S. (1996). Gabor Mosaics: A description of Local Orientation Statistics with Applications to Machine Perception. In Proceedings of the Eight Annual Conference of the Cognitive Science Society. Mahwah, NJ: LEA."},{"key":"168699_CR26","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4612-0745-0","volume-title":"Bayesian learning for neural networks","author":"R.M. Neal","year":"1996","unstructured":"Neal, R.M. (1996). Bayesian learning for neural networks. New York: Springer."},{"key":"168699_CR27","unstructured":"O'Hagan, A. (1994). Kendall's Advanced Theory of Statistics: Volume 2B, Bayesian Inference. Cambridge University Press."},{"key":"168699_CR28","doi-asserted-by":"crossref","first-page":"171","DOI":"10.1109\/TIT.1954.1057460","volume":"4","author":"W.W. Peterson","year":"1954","unstructured":"Peterson, W.W., Birdsall, T.G., & Fox, W.C. (1954). The theory of signal detectability. Transactions IRE Professional Group on Information Theory, 4, 171\u2013212.","journal-title":"Transactions IRE Professional Group on Information Theory"},{"key":"168699_CR29","doi-asserted-by":"crossref","unstructured":"Stork, D.G., Wolff, G.J., & Levine, E.P. (1992). Neural Network Lipreading System for Improved Speech Recognition. Proceedings International Joint Conference on Neural Networks (pp. 289\u2013295). IEEE.","DOI":"10.1109\/IJCNN.1992.226994"},{"key":"168699_CR30","first-page":"1027","volume-title":"Advances in Neural Information Processing Systems","author":"G.J. Wolff","year":"1994","unstructured":"Wolff, G.J., Venkatesh Prasad, K., Stork, D.G., & Hennecke, M.E. (1994). Lipreading by Neural Networks: Visual Preprocessing, Learning and Sensory Integration. In J.D. Cowan, G. Tesauro, & J. Alspector (eds.), Advances in Neural Information Processing Systems, 6, 1027\u20131034. San Mateo, CA: Morgan Kaufmann."},{"issue":"10","key":"168699_CR31","doi-asserted-by":"crossref","first-page":"921","DOI":"10.1016\/0031-3203(91)90089-N","volume":"24","author":"J. Wu","year":"1991","unstructured":"Wu, J., Tamura, S., Mitsumoto, H., Kawai, H., Kurosu, K., & Okazaki, K. (1991). Neural network vowel recognition Jointly using voice features and mouth shape image. Pattern Recognition, 24(10), 921\u2013927.","journal-title":"Pattern Recognition"},{"issue":"10","key":"168699_CR32","doi-asserted-by":"crossref","first-page":"1658","DOI":"10.1109\/5.58349","volume":"78","author":"B.P. Yuhas","year":"1990","unstructured":"Yuhas, B.P., Goldstein, M.H., Sejnowski, T.J., & Jenkins, R.E. (1990). Neural Network Models of Sensory Integration for Improved Vowel Recognition. Proc. IEEE, 78(10), 1658\u20131668.","journal-title":"Proc. IEEE"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1007468413059.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1023\/A:1007468413059\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1007468413059.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,10]],"date-time":"2025-07-10T11:36:13Z","timestamp":1752147373000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1023\/A:1007468413059"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[1998,8]]},"references-count":32,"journal-issue":{"issue":"2","published-print":{"date-parts":[[1998,8]]}},"alternative-id":["168699"],"URL":"https:\/\/doi.org\/10.1023\/a:1007468413059","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"type":"print","value":"0885-6125"},{"type":"electronic","value":"1573-0565"}],"subject":[],"published":{"date-parts":[[1998,8]]},"assertion":[{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}