{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:48:55Z","timestamp":1777657735087,"version":"3.51.4"},"reference-count":27,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,3]]},"DOI":"10.1109\/icassp.2017.7952625","type":"proceedings-article","created":{"date-parts":[[2017,6,20]],"date-time":"2017-06-20T17:35:36Z","timestamp":1497980136000},"page":"2592-2596","source":"Crossref","is-referenced-by-count":91,"title":["End-to-end visual speech recognition with LSTMS"],"prefix":"10.1109","author":[{"given":"Stavros","family":"Petridis","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zuwei","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Maja","family":"Pantic","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICIS.2016.7550888"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472088"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-014-0629-7"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472852"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1121\/1.2229005"},{"key":"ref15","first-page":"863","article-title":"Learning dynamic stream weights for coupled-hmm-based audiovisual speech recognition","volume":"23","author":"abdelaziz","year":"2015","journal-title":"IEEE Trans on Audio Sp and Lang Proc"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/FG.2015.7163155"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1155\/S1110865702206101"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.241"},{"key":"ref19","doi-asserted-by":"crossref","first-page":"504","DOI":"10.1126\/science.1127647","article-title":"Reducing the dimensionality of data with neural networks","volume":"313","author":"hinton","year":"2006","journal-title":"Science"},{"key":"ref4","first-page":"689","article-title":"Multimodal deep learning","author":"ngiam","year":"2011","journal-title":"Proc of ICML"},{"key":"ref27","author":"zeiler","year":"2012","journal-title":"ADADELTA An Adaptive Learning Rate Method"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995345"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178224"},{"key":"ref5","article-title":"Integration of deep bottleneck features for audio-visual speech recognition","author":"ninomiya","year":"2015","journal-title":"Conf of the International Speech Communication Association"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178347"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639140"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/6046.865479"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.26"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2003.817150"},{"key":"ref20","doi-asserted-by":"crossref","first-page":"599","DOI":"10.1007\/978-3-642-35289-8_32","author":"hinton","year":"2012","journal-title":"Neural Networks Tricks of the Trade"},{"key":"ref22","year":"0"},{"key":"ref21","first-page":"175","volume":"3","author":"young","year":"2002","journal-title":"The HTK Book"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/MMSP.2007.4412868"},{"key":"ref23","first-page":"2949","article-title":"Multimodal learning with deep boltzmann machines","volume":"15","author":"srivastava","year":"2014","journal-title":"J Mach Learn Res"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2008.2011515"},{"key":"ref25","first-page":"79","article-title":"Patch-based representation of visual speech","author":"lucey","year":"2006","journal-title":"Proceedings of the HCSNet Workshop on the use of Vision in HCI"}],"event":{"name":"2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"New Orleans, LA","start":{"date-parts":[[2017,3,5]]},"end":{"date-parts":[[2017,3,9]]}},"container-title":["2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7943262\/7951776\/07952625.pdf?arnumber=7952625","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,26]],"date-time":"2019-09-26T07:23:58Z","timestamp":1569482638000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7952625\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,3]]},"references-count":27,"URL":"https:\/\/doi.org\/10.1109\/icassp.2017.7952625","relation":{},"subject":[],"published":{"date-parts":[[2017,3]]}}}