{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,11]],"date-time":"2025-04-11T08:52:12Z","timestamp":1744361532107,"version":"3.40.3"},"publisher-location":"Cham","reference-count":31,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031320286"},{"type":"electronic","value":"9783031320293"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-32029-3_17","type":"book-chapter","created":{"date-parts":[[2023,5,15]],"date-time":"2023-05-15T11:57:09Z","timestamp":1684151829000},"page":"180-195","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["A Review on\u00a0Deep Learning-Based Automatic Lipreading"],"prefix":"10.1007","author":[{"given":"Carlos","family":"Santos","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3458-7693","authenticated-orcid":false,"given":"Ant\u00f3nio","family":"Cunha","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4383-0472","authenticated-orcid":false,"given":"Paulo","family":"Coelho","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,5,14]]},"reference":[{"key":"17_CR1","volume-title":"Spoken Language Processing: A Guide to Theory, Algorithm, and System Development","author":"X Huang","year":"2001","unstructured":"Huang, X., Acero, A., Hon, H.-W.: Spoken Language Processing: A Guide to Theory, Algorithm, and System Development. Prentice Hall PTR, Upper Saddle River (2001)"},{"key":"17_CR2","doi-asserted-by":"crossref","unstructured":"Das, S.K., Nandakishor, S., Pati, D.: Automatic lip contour extraction using pixel-based segmentation and piece-wise polynomial fitting. In: 2017 14th IEEE India Council International Conference (INDICON), Roorkee. IEEE, pp. 1\u20135 (2017). https:\/\/ieeexplore.ieee.org\/document\/8487538\/","DOI":"10.1109\/INDICON.2017.8487538"},{"key":"17_CR3","unstructured":"Bauman, N.: Speechreading (Lip-Reading) (2011). https:\/\/hearinglosshelp.com\/blog\/speechreading-lip-reading\/"},{"key":"17_CR4","unstructured":"Petajan, E.D.: Automatic lipreading to enhance speech recognition. In: Degree of Doctor of Philosophy in Electrica l Engineering, University of Illinois, Urbana-Champaign (1984)"},{"key":"17_CR5","doi-asserted-by":"crossref","unstructured":"Huang, H., et al.: A novel machine lip reading model. Procedia Comput. Sci. 199, 1432\u20131437 (2022). https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S187705092200182X","DOI":"10.1016\/j.procs.2022.01.181"},{"key":"17_CR6","unstructured":"Assael, Y.M., Shillingford, B., Whiteson, S., de Freitas, N.: LipNet: end-to-end sentence-level lipreading (2016). arXiv:1611.01599"},{"key":"17_CR7","unstructured":"Petridis, S., Wang, Y., Ma, P., Li, Z., Pantic, M.: End-to-end visual speech recognition for small-scale datasets (2019). arXiv Version Number: 4. https:\/\/arxiv.org\/abs\/1904.01954"},{"key":"17_CR8","doi-asserted-by":"crossref","unstructured":"Fung, I., Mak, B.: End-to-end low-resource lip-reading with maxout Cnn and Lstm. In: 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Calgary, AB. IEEE, pp. 2511\u20132515 (2018). https:\/\/ieeexplore.ieee.org\/document\/8462280\/","DOI":"10.1109\/ICASSP.2018.8462280"},{"key":"17_CR9","doi-asserted-by":"crossref","unstructured":"Prajwal, K.R., Afouras, T., Zisserman, A.: Sub-word level lip reading with visual attention (2021). arXiv:2110.07603","DOI":"10.1109\/CVPR52688.2022.00510"},{"key":"17_CR10","doi-asserted-by":"crossref","unstructured":"Fenghour, S., Chen, D., Guo, K., Li, B., Xiao, P.: Deep learning-based automated lip-reading: a survey. IEEE Access, 9 121184\u2013121205 (2021). https:\/\/ieeexplore.ieee.org\/document\/9522117\/","DOI":"10.1109\/ACCESS.2021.3107946"},{"key":"17_CR11","doi-asserted-by":"crossref","unstructured":"Hao, M., Mamut, M., Ubul, K.: A survey of lipreading methods based on deep learning. In: 2020 2nd International Conference on Image Processing and Machine Vision, Bangkok Thailand. ACM, pp. 31\u201339 (2020). https:\/\/dl.acm.org\/doi\/10.1145\/3421558.3421563","DOI":"10.1145\/3421558.3421563"},{"key":"17_CR12","doi-asserted-by":"crossref","unstructured":"Alam, M., Samad, M., Vidyaratne, L., Glandon, A., Iftekharuddin, K.: Survey on deep neural networks in speech and vision systems. Neurocomputing 417, 302\u2013321 (2020). https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0925231220311619","DOI":"10.1016\/j.neucom.2020.07.053"},{"key":"17_CR13","series-title":"Lecture Notes in Networks and Systems","doi-asserted-by":"publisher","first-page":"307","DOI":"10.1007\/978-981-13-2514-4_26","volume-title":"Data Analytics and Learning","author":"S Bhaskar","year":"2019","unstructured":"Bhaskar, S., Thasleema, T.M., Rajesh, R.: A survey on different visual speech recognition techniques. In: Nagabhushan, P., Guru, D.S., Shekar, B.H., Kumar, Y.H.S. (eds.) Data Analytics and Learning. LNNS, vol. 43, pp. 307\u2013316. Springer, Singapore (2019). https:\/\/doi.org\/10.1007\/978-981-13-2514-4_26"},{"key":"17_CR14","doi-asserted-by":"crossref","unstructured":"Fernandez-Lopez, A., Sukno, F.M.: Survey on automatic lip-reading in the era of deep learning. Image Vis. Comput. 78, 53\u201372 (2018). https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0262885618301276","DOI":"10.1016\/j.imavis.2018.07.002"},{"key":"17_CR15","doi-asserted-by":"crossref","unstructured":"Fernandez-Lopez, A., Martinez, O., Sukno, F.M.: Towards estimating the upper bound of visual-speech recognition: the visual lip-reading feasibility database. In: 2017 12th IEEE International Conference on Automatic Face & Gesture Recognition, Washington, DC, USA. IEEE, pp. 208\u2013215 (2017). http:\/\/ieeexplore.ieee.org\/document\/7961743\/","DOI":"10.1109\/FG.2017.34"},{"key":"17_CR16","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Yang, S., Xiao, J., Shan, S., Chen, X.: Can we read speech beyond the lips? Rethinking RoI selection for deep visual speech recognition (2020). arXiv Version Number: 2. https:\/\/arxiv.org\/abs\/2003.03206","DOI":"10.1109\/FG47880.2020.00134"},{"key":"17_CR17","doi-asserted-by":"crossref","unstructured":"Lu, Y., Zhu, X., Xiao, K.: Unsupervised lip segmentation based on quad-tree MRF framework in wavelet domain. Measurement 141, 95\u2013101 (2019). https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0263224119302180","DOI":"10.1016\/j.measurement.2019.03.009"},{"key":"17_CR18","doi-asserted-by":"crossref","unstructured":"Lu, Y., Liu, Q.: Lip segmentation using automatic selected initial contours based on localized active contour model. EURASIP J. Image Video Process. 2018(1), 7 (2018). https:\/\/jivp-eurasipjournals.springeropen.com\/articles\/10.1186\/s13640-017-0243-9","DOI":"10.1186\/s13640-017-0243-9"},{"key":"17_CR19","doi-asserted-by":"crossref","unstructured":"Radha, N., Shahina, A., Khan, N.: Visual speech recognition using fusion of motion and geometric features. Procedia Comput. Sci. 171, 924\u2013933 (2020). https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1877050920310760","DOI":"10.1016\/j.procs.2020.04.100"},{"key":"17_CR20","unstructured":"Weng, X., Kitani, K.: Learning spatio-temporal features with two-stream deep 3D CNNs for lipreading (2019). arXiv:1905.02540. http:\/\/arxiv.org\/abs\/1905.02540"},{"key":"17_CR21","doi-asserted-by":"crossref","unstructured":"Lu, Y., Yan, J.: automatic lip reading using convolution neural network and bidirectional long short-term memory. Int. J. Pattern Recog. Artif. Intell. 34(01), 2054003 (2020). https:\/\/www.worldscientific.com\/doi\/abs\/10.1142\/S0218001420540038","DOI":"10.1142\/S0218001420540038"},{"key":"17_CR22","doi-asserted-by":"crossref","unstructured":"Mesbah, A., Berrahou, A., Hammouchi, H., Berbia, H., Qjidaa, H., Daoudi, M.: Lip reading with Hahn convolutional neural networks. Image Vis. Comput. 88, 76\u201383 (2019). https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0262885619300605","DOI":"10.1016\/j.imavis.2019.04.010"},{"key":"17_CR23","doi-asserted-by":"crossref","unstructured":"Ma, X., Zhang, H., Li, Y.: Feature extraction method for lip-reading under variant lighting conditions. In: Proceedings of the 9th International Conference on Machine Learning and Computing, Singapore. ACM, pp. 320\u2013326 (2017). https:\/\/dl.acm.org\/doi\/10.1145\/3055635.3056576","DOI":"10.1145\/3055635.3056576"},{"key":"17_CR24","doi-asserted-by":"crossref","unstructured":"Jeon, S., Elsharkawy, A., Kim, M.S.: Lipreading architecture based on multiple convolutional neural networks for sentence-level visual speech recognition. Sensors 22(1), 72 (2021). https:\/\/www.mdpi.com\/1424-8220\/22\/1\/72","DOI":"10.3390\/s22010072"},{"key":"17_CR25","unstructured":"Wang, C.: Multi-grained spatio-temporal modeling for lip-reading. arXiv Version Number: 2 (2019). https:\/\/arxiv.org\/abs\/1908.11618"},{"key":"17_CR26","doi-asserted-by":"crossref","unstructured":"Fenghour, S., Chen, D., Guo, K., Xiao, P.: Lip reading sentences using deep learning with only visual cues. IEEE Access, 8, 215 516\u2013215 530 (2020). https:\/\/ieeexplore.ieee.org\/document\/9272286\/","DOI":"10.1109\/ACCESS.2020.3040906"},{"key":"17_CR27","doi-asserted-by":"crossref","unstructured":"Fenghour, S., Chen, D., Guo, K., Li, B., Xiao, P.: An effective conversion of visemes to words for high-performance automatic lipreading. Sensors 21(23), 7890 (2021). https:\/\/www.mdpi.com\/1424-8220\/21\/23\/7890","DOI":"10.3390\/s21237890"},{"key":"17_CR28","doi-asserted-by":"crossref","unstructured":"Martinez, B., Ma, P., Petridis, S., Pantic, M.: Lipreading using temporal convolutional networks. arXiv Version Number: 1 (2020). https:\/\/arxiv.org\/abs\/2001.08702","DOI":"10.1109\/ICASSP40776.2020.9053841"},{"key":"17_CR29","doi-asserted-by":"crossref","unstructured":"Lu, Y., Li, H.: Automatic lip-reading system based on deep convolutional neural network and attention-based long short-term memory. Appl. Sci. 9(8), 1599 (2019). https:\/\/www.mdpi.com\/2076-3417\/9\/8\/1599","DOI":"10.3390\/app9081599"},{"key":"17_CR30","doi-asserted-by":"crossref","unstructured":"Afouras, T., Chung, J.S., Zisserman, A.: ASR is all you need: cross-modal distillation for lip reading (2020). arXiv:1911.12747 [cs, eess]. http:\/\/arxiv.org\/abs\/1911.12747","DOI":"10.1109\/ICASSP40776.2020.9054253"},{"key":"17_CR31","doi-asserted-by":"crossref","unstructured":"Gupta, A.K., Gupta, P., Rahtu, E.: FATALRead - fooling visual speech recognition models: put words on lips. Appl. Intell. (2021). https:\/\/link.springer.com\/10.1007\/s10489-021-02846-w","DOI":"10.1007\/s10489-021-02846-w"}],"container-title":["Lecture Notes of the Institute for Computer Sciences, Social Informatics and Telecommunications Engineering","Wireless Mobile Communication and Healthcare"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-32029-3_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,15]],"date-time":"2023-05-15T12:05:58Z","timestamp":1684152358000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-32029-3_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031320286","9783031320293"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-32029-3_17","relation":{},"ISSN":["1867-8211","1867-822X"],"issn-type":[{"type":"print","value":"1867-8211"},{"type":"electronic","value":"1867-822X"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"14 May 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MobiHealth","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Wireless Mobile Communication and Healthcare","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 November 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 December 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"mobihealth2022a","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Confy +","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"75","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"30","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"40% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}