{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,6]],"date-time":"2025-11-06T12:15:07Z","timestamp":1762431307437,"version":"3.40.3"},"publisher-location":"Cham","reference-count":25,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319544267"},{"type":"electronic","value":"9783319544274"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-54427-4_22","type":"book-chapter","created":{"date-parts":[[2017,3,15]],"date-time":"2017-03-15T08:16:53Z","timestamp":1489565813000},"page":"290-302","source":"Crossref","is-referenced-by-count":21,"title":["Multi-view Automatic Lip-Reading Using Neural Network"],"prefix":"10.1007","author":[{"given":"Daehyun","family":"Lee","sequence":"first","affiliation":[]},{"given":"Jongmin","family":"Lee","sequence":"additional","affiliation":[]},{"given":"Kee-Eung","family":"Kim","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,3,16]]},"reference":[{"key":"22_CR1","doi-asserted-by":"crossref","first-page":"746","DOI":"10.1038\/264746a0","volume":"264","author":"H McGurk","year":"1976","unstructured":"McGurk, H., MacDonald, J.: Hearing lips and seeing voices. Nature 264, 746\u2013748 (1976)","journal-title":"Nature"},{"key":"22_CR2","unstructured":"Ngiam, J., Khosla, A., Kim, M., Nam, J., Lee, H., Ng, A.Y.: Multimodal deep learning. In: Proceedings of 28th International Conference on Machine Learning (ICML-2011), pp. 689\u2013696 (2011)"},{"key":"22_CR3","doi-asserted-by":"crossref","unstructured":"Potamianos, G., Neti, C.: Audio-visual speech recognition in challenging environments. In: INTERSPEECH (2003)","DOI":"10.21437\/Eurospeech.2003-410"},{"key":"22_CR4","doi-asserted-by":"crossref","unstructured":"Anina, I., Zhou, Z., Zhao, G., Pietik\u00e4inen, M.: Ouluvs2: a multi-view audiovisual database for non-rigid mouth motion analysis. In: 2015 11th IEEE International Conference and Workshops on Automatic Face and Gesture Recognition (FG), vol. 1, pp. 1\u20135. IEEE (2015)","DOI":"10.1109\/FG.2015.7163155"},{"key":"22_CR5","doi-asserted-by":"crossref","first-page":"681","DOI":"10.1109\/34.927467","volume":"23","author":"TF Cootes","year":"2001","unstructured":"Cootes, T.F., Edwards, G.J., Taylor, C.J., et al.: Active appearance models. IEEE Trans. Pattern Analysis Mach. Intell. 23, 681\u2013685 (2001)","journal-title":"IEEE Trans. Pattern Analysis Mach. Intell."},{"key":"22_CR6","doi-asserted-by":"crossref","first-page":"1254","DOI":"10.1109\/TMM.2009.2030637","volume":"11","author":"G Zhao","year":"2009","unstructured":"Zhao, G., Barnard, M., Pietikainen, M.: Lipreading with local spatiotemporal descriptors. IEEE Trans. Multimedia 11, 1254\u20131265 (2009)","journal-title":"IEEE Trans. Multimedia"},{"key":"22_CR7","doi-asserted-by":"crossref","unstructured":"Shaikh, A.A., Kumar, D.K., Yau, W.C., Azemin, M.C., Gubbi, J.: Lip reading using optical flow and support vector machines. In: 2010 3rd International Congress on Image and Signal Processing (CISP), vol. 1, pp. 327\u2013330. IEEE (2010)","DOI":"10.1109\/CISP.2010.5646264"},{"key":"22_CR8","doi-asserted-by":"crossref","unstructured":"Bregler, C., Konig, Y.: Eigenlips for robust speech recognition. In: 1994 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP-1994, vol. 2, p. II-669. IEEE (1994)","DOI":"10.1109\/ICASSP.1994.389567"},{"key":"22_CR9","doi-asserted-by":"crossref","unstructured":"Dalal, N., Triggs, B.: Histograms of oriented gradients for human detection. In: 2005 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR 2005), vol. 1, pp. 886\u2013893. IEEE (2005)","DOI":"10.1109\/CVPR.2005.177"},{"key":"22_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1007\/978-3-319-11755-3_3","volume-title":"Image Analysis and Recognition","author":"A Rekik","year":"2014","unstructured":"Rekik, A., Ben-Hamadou, A., Mahdi, W.: A new visual speech recognition approach for RGB-D cameras. In: Campilho, A., Kamel, M. (eds.) ICIAR 2014. LNCS, vol. 8815, pp. 21\u201328. Springer, Heidelberg (2014). doi: 10.1007\/978-3-319-11755-3_3"},{"key":"22_CR11","doi-asserted-by":"crossref","unstructured":"Pass, A., Zhang, J., Stewart, D.: An investigation into features for multi-view lipreading. In: 2010 IEEE International Conference on Image Processing, pp. 2417\u20132420. IEEE (2010)","DOI":"10.1109\/ICIP.2010.5650963"},{"key":"22_CR12","doi-asserted-by":"crossref","unstructured":"Wand, M., Koutn, J., et al.: Lipreading with long short-term memory. In: 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6115\u20136119. IEEE (2016)","DOI":"10.1109\/ICASSP.2016.7472852"},{"key":"22_CR13","doi-asserted-by":"crossref","unstructured":"Venugopalan, S., Rohrbach, M., Donahue, J., Mooney, R., Darrell, T., Saenko, K.: Sequence to sequence-video to text. In: Proceedings of IEEE International Conference on Computer Vision, pp. 4534\u20134542 (2015)","DOI":"10.1109\/ICCV.2015.515"},{"key":"22_CR14","unstructured":"Venugopalan, S., Xu, H., Donahue, J., Rohrbach, M., Mooney, R., Saenko, K.: Translating videos to natural language using deep recurrent neural networks (2014). arXiv preprint arXiv:1412.4729"},{"key":"22_CR15","doi-asserted-by":"crossref","unstructured":"Yao, L., Torabi, A., Cho, K., Ballas, N., Pal, C., Larochelle, H., Courville, A.: Describing videos by exploiting temporal structure. In: Proceedings of IEEE International Conference on Computer Vision, pp. 4507\u20134515 (2015)","DOI":"10.1109\/ICCV.2015.512"},{"key":"22_CR16","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1113\/jphysiol.1968.sp008455","volume":"195","author":"DH Hubel","year":"1968","unstructured":"Hubel, D.H., Wiesel, T.N.: Receptive fields and functional architecture of monkey striate cortex. J. Physiol. 195, 215\u2013243 (1968)","journal-title":"J. Physiol."},{"key":"22_CR17","doi-asserted-by":"crossref","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9, 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"22_CR18","doi-asserted-by":"crossref","first-page":"2451","DOI":"10.1162\/089976600300015015","volume":"12","author":"FA Gers","year":"2000","unstructured":"Gers, F.A., Schmidhuber, J., Cummins, F.: Learning to forget: continual prediction with LSTM. Neural Comput. 12, 2451\u20132471 (2000)","journal-title":"Neural Comput."},{"key":"22_CR19","unstructured":"Bastien, F., Lamblin, P., Pascanu, R., Bergstra, J., Goodfellow, I., Bergeron, A., Warde-Farley, D., Bengio, Y.: Theano: new features and speed improvements (2012). arXiv preprint arXiv:1211.5590"},{"key":"22_CR20","doi-asserted-by":"crossref","first-page":"1306","DOI":"10.1109\/JPROC.2003.817150","volume":"91","author":"G Potamianos","year":"2003","unstructured":"Potamianos, G., Neti, C., Gravier, G., Garg, A., Senior, A.W.: Recent advances in the automatic recognition of audiovisual speech. Proc. IEEE 91, 1306\u20131326 (2003)","journal-title":"Proc. IEEE"},{"key":"22_CR21","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TPAMI.2013.173","volume":"36","author":"Z Zhou","year":"2014","unstructured":"Zhou, Z., Hong, X., Zhao, G., Pietik\u00e4inen, M.: A compact representation of visual speech data using latent variables. IEEE Trans. Pattern Anal. Mach. Intell. 36, 1\u20131 (2014)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"22_CR22","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava, N., Hinton, G.E., Krizhevsky, A., Sutskever, I., Salakhutdinov, R.: Dropout: a simple way to prevent neural networks from overfitting. J. Mach. Learn. Res. 15, 1929\u20131958 (2014)","journal-title":"J. Mach. Learn. Res."},{"key":"22_CR23","first-page":"115","volume":"28","author":"J Bergstra","year":"2013","unstructured":"Bergstra, J., Yamins, D., Cox, D.D.: Making a science of model search: hyperparameter optimization in hundreds of dimensions for vision architectures. ICML (1) 28, 115\u2013123 (2013)","journal-title":"ICML (1)"},{"key":"22_CR24","unstructured":"Kingma, D., Ba, J.: Adam: a method for stochastic optimization (2014). arXiv preprint arXiv:1412.6980"},{"key":"22_CR25","unstructured":"Chollet, F.: Keras (2015). https:\/\/github.com\/fchollet\/keras"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ACCV 2016 Workshops"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-54427-4_22","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,22]],"date-time":"2023-08-22T16:51:42Z","timestamp":1692723102000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-54427-4_22"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319544267","9783319544274"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-54427-4_22","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2017]]}}}