{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,13]],"date-time":"2025-06-13T19:41:14Z","timestamp":1749843674265,"version":"3.28.0"},"reference-count":36,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,1,19]]},"DOI":"10.1109\/slt48900.2021.9383540","type":"proceedings-article","created":{"date-parts":[[2021,3,25]],"date-time":"2021-03-25T16:46:54Z","timestamp":1616690814000},"page":"613-620","source":"Crossref","is-referenced-by-count":8,"title":["Speaker-Independent Visual Speech Recognition with the Inception V3 Model"],"prefix":"10.1109","author":[{"given":"Timothy","family":"Israel Santos","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Andrew","family":"Abel","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nick","family":"Wilson","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yan","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-13509-0"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICBDA.2019.8713256"},{"key":"ref31","article-title":"Comparison of human and machine-based lip-reading","author":"hilder","year":"2009","journal-title":"AVSPN"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.3758\/BF03204211"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.319"},{"article-title":"The building blocks of interpretability. distill","year":"2018","author":"olah","key":"ref35"},{"key":"ref34","first-page":"1033","article-title":"Fast Lip Feature Extraction Using Psychologically Motivated Gabor Features","volume":"1","author":"abel","year":"2019","journal-title":"Proceedings of the 2018 IEEE Symposium Series on Computational Intelligence SSCI 2018"},{"key":"ref10","first-page":"87","article-title":"Lip reading in the wild","author":"son chung","year":"2016","journal-title":"Asian Conference on Computer Vision"},{"key":"ref11","article-title":"Lip reading in profile","author":"chung","year":"2017","journal-title":"British Machine Vision Conference"},{"key":"ref12","article-title":"Lip reading sentences in the wild","author":"son chung","year":"2017","journal-title":"The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-85"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462280"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461326"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/FG.2018.00088"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178347"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2017.2761539"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461326"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1117\/12.2522936"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2004.10.011"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472852"},{"key":"ref3","article-title":"Deep audiovisual speech recognition","author":"afouras","year":"2018","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1155\/2007\/47891"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-39431-8_16"},{"key":"ref5","article-title":"Audio-visual scene analysis: evidence for a \"very-early\" integration process in audiovisual speech perception","author":"schwartz","year":"2002","journal-title":"Seventh International Conference on Spoken Language Processing"},{"article-title":"Lipnet: Endto-end sentence-level lipreading","year":"2016","author":"assael","key":"ref8"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICIS.2016.7550888"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/264746a0"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1121\/1.1907309"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICISCE.2016.91"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.308"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.351"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1121\/1.2229005"},{"year":"2019","key":"ref24","article-title":"Google ai notebook platform"},{"key":"ref23","first-page":"630","article-title":"Identity mappings in deep residual networks","author":"he","year":"2016","journal-title":"European Conference on Computer Vision"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2018.07.002"},{"article-title":"Multimodal feature fusion and deep learning with audio-visual speech recognition github repository","year":"2019","author":"santos","key":"ref25"}],"event":{"name":"2021 IEEE Spoken Language Technology Workshop (SLT)","start":{"date-parts":[[2021,1,19]]},"location":"Shenzhen, China","end":{"date-parts":[[2021,1,22]]}},"container-title":["2021 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9383468\/9383452\/09383540.pdf?arnumber=9383540","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,5,31]],"date-time":"2021-05-31T17:30:35Z","timestamp":1622482235000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9383540\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1,19]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/slt48900.2021.9383540","relation":{},"subject":[],"published":{"date-parts":[[2021,1,19]]}}}