{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T02:47:17Z","timestamp":1770346037884,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":111,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,5,6]],"date-time":"2021-05-06T00:00:00Z","timestamp":1620259200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,5,6]]},"DOI":"10.1145\/3411764.3445565","type":"proceedings-article","created":{"date-parts":[[2021,5,8]],"date-time":"2021-05-08T05:28:50Z","timestamp":1620451730000},"page":"1-19","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":32,"title":["LipType: A Silent Speech Recognizer Augmented with an Independent Repair Model"],"prefix":"10.1145","author":[{"given":"Laxmi","family":"Pandey","sequence":"first","affiliation":[{"name":"Human Computer Interaction Group University of California, Merced, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ahmed Sabbir","family":"Arif","sequence":"additional","affiliation":[{"name":"Human-Computer Interaction Group University of California, Merced, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2021,5,7]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Tensorflow: Large-Scale Machine Learning on Heterogeneous Distributed Systems. (March","author":"Abadi Mart\u00edn","year":"2016","unstructured":"Mart\u00edn Abadi , Ashish Agarwal , Paul Barham , Eugene Brevdo , Zhifeng Chen , Craig Citro , Greg\u00a0 S. Corrado , Andy Davis , Jeffrey Dean , Matthieu Devin , Sanjay Ghemawat , Ian Goodfellow , Andrew Harp , Geoffrey Irving , Michael Isard , Yangqing Jia , Rafal Jozefowicz , Lukasz Kaiser , Manjunath Kudlur , Josh Levenberg , Dan Mane , Rajat Monga , Sherry Moore , Derek Murray , Chris Olah , Mike Schuster , Jonathon Shlens , Benoit Steiner , Ilya Sutskever , Kunal Talwar , Paul Tucker , Vincent Vanhoucke , Vijay Vasudevan , Fernanda Viegas , Oriol Vinyals , Pete Warden , Martin Wattenberg , Martin Wicke , Yuan Yu , and Xiaoqiang Zheng . 2016 . Tensorflow: Large-Scale Machine Learning on Heterogeneous Distributed Systems. (March 2016). http:\/\/arxiv.org\/abs\/1603.04467 Mart\u00edn Abadi, Ashish Agarwal, Paul Barham, Eugene Brevdo, Zhifeng Chen, Craig Citro, Greg\u00a0S. Corrado, Andy Davis, Jeffrey Dean, Matthieu Devin, Sanjay Ghemawat, Ian Goodfellow, Andrew Harp, Geoffrey Irving, Michael Isard, Yangqing Jia, Rafal Jozefowicz, Lukasz Kaiser, Manjunath Kudlur, Josh Levenberg, Dan Mane, Rajat Monga, Sherry Moore, Derek Murray, Chris Olah, Mike Schuster, Jonathon Shlens, Benoit Steiner, Ilya Sutskever, Kunal Talwar, Paul Tucker, Vincent Vanhoucke, Vijay Vasudevan, Fernanda Viegas, Oriol Vinyals, Pete Warden, Martin Wattenberg, Martin Wicke, Yuan Yu, and Xiaoqiang Zheng. 2016. Tensorflow: Large-Scale Machine Learning on Heterogeneous Distributed Systems. (March 2016). http:\/\/arxiv.org\/abs\/1603.04467"},{"key":"e_1_3_2_2_2_1","volume-title":"Learning to Correct Overexposed and Underexposed Photos. (March","author":"Afifi Mahmoud","year":"2020","unstructured":"Mahmoud Afifi , Konstantinos\u00a0 G. Derpanis , Bj\u00f6rn Ommer , and Michael\u00a0 S. Brown . 2020. Learning to Correct Overexposed and Underexposed Photos. (March 2020 ). http:\/\/arxiv.org\/abs\/2003.11596 Mahmoud Afifi, Konstantinos\u00a0G. Derpanis, Bj\u00f6rn Ommer, and Michael\u00a0S. Brown. 2020. Learning to Correct Overexposed and Underexposed Photos. (March 2020). http:\/\/arxiv.org\/abs\/2003.11596"},{"key":"e_1_3_2_2_3_1","volume-title":"Deep Lip Reading: A Comparison of Models and an Online Application. (June","author":"Afouras Triantafyllos","year":"2018","unstructured":"Triantafyllos Afouras , Joon\u00a0Son Chung , and Andrew Zisserman . 2018. Deep Lip Reading: A Comparison of Models and an Online Application. (June 2018 ). http:\/\/arxiv.org\/abs\/1806.06053 Triantafyllos Afouras, Joon\u00a0Son Chung, and Andrew Zisserman. 2018. Deep Lip Reading: A Comparison of Models and an Online Application. (June 2018). http:\/\/arxiv.org\/abs\/1806.06053"},{"key":"e_1_3_2_2_4_1","unstructured":"Abien\u00a0Fred Agarap. 2019. Deep Learning using Rectified Linear Units (ReLU). (2019). http:\/\/arxiv.org\/abs\/1803.08375  Abien\u00a0Fred Agarap. 2019. Deep Learning using Rectified Linear Units (ReLU). (2019). http:\/\/arxiv.org\/abs\/1803.08375"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"crossref","unstructured":"Alexandre Allauzen. 2007. Error Detection in Confusion Network. In INTERSPEECH.  Alexandre Allauzen. 2007. Error Detection in Confusion Network. In INTERSPEECH.","DOI":"10.21437\/Interspeech.2007-490"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472172"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2009.2021548"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIC-STH.2009.5444533"},{"key":"e_1_3_2_2_9_1","volume-title":"Lipnet: End-to-End Sentence-Level Lipreading. (Dec.","author":"Assael M.","year":"2016","unstructured":"Yannis\u00a0 M. Assael , Brendan Shillingford , Shimon Whiteson , and Nando de Freitas . 2016 . Lipnet: End-to-End Sentence-Level Lipreading. (Dec. 2016). http:\/\/arxiv.org\/abs\/1611.01599 Yannis\u00a0M. Assael, Brendan Shillingford, Shimon Whiteson, and Nando de Freitas. 2016. Lipnet: End-to-End Sentence-Level Lipreading. (Dec. 2016). http:\/\/arxiv.org\/abs\/1611.01599"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jneumeth.2008.06.030"},{"key":"e_1_3_2_2_11_1","volume-title":"Asr Context-Sensitive Error Correction Based on Microsoft N-Gram Dataset. (March","author":"Bassil Youssef","year":"2012","unstructured":"Youssef Bassil and Paul Semaan . 2012. Asr Context-Sensitive Error Correction Based on Microsoft N-Gram Dataset. (March 2012 ). http:\/\/arxiv.org\/abs\/1203.5262 Youssef Bassil and Paul Semaan. 2012. Asr Context-Sensitive Error Correction Based on Microsoft N-Gram Dataset. (March 2012). http:\/\/arxiv.org\/abs\/1203.5262"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.3390\/app9183870"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2010.01.001"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2011.2157513"},{"key":"e_1_3_2_2_15_1","volume-title":"Learning to See in the Dark. (May","author":"Chen Chen","year":"2018","unstructured":"Chen Chen , Qifeng Chen , Jia Xu , and Vladlen Koltun . 2018. Learning to See in the Dark. (May 2018 ). http:\/\/arxiv.org\/abs\/1805.01934 Chen Chen, Qifeng Chen, Jia Xu, and Vladlen Koltun. 2018. Learning to See in the Dark. (May 2018). http:\/\/arxiv.org\/abs\/1805.01934"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.3115\/981863.981904"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639104"},{"key":"e_1_3_2_2_18_1","unstructured":"Junyoung Chung Caglar Gulcehre KyungHyun Cho and Yoshua Bengio. 2014. Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling. (2014). http:\/\/arxiv.org\/abs\/1412.3555  Junyoung Chung Caglar Gulcehre KyungHyun Cho and Yoshua Bengio. 2014. Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling. (2014). http:\/\/arxiv.org\/abs\/1412.3555"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.367"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-54427-4_19"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.5244\/C.31.155"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-54184-6_6"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2018.02.001"},{"key":"e_1_3_2_2_24_1","unstructured":"C. Cieri D. Miller and K. Walker. 2004. The Fisher Corpus: A Resource for the Next Generations of Speech-to-Text. In LREC.  C. Cieri D. Miller and K. Walker. 2004. The Fisher Corpus: A Resource for the Next Generations of Speech-to-Text. In LREC."},{"key":"e_1_3_2_2_25_1","unstructured":"Ronan Collobert Awni Hannun and Gabriel Synnaeve. 2019. A Fully Differentiable Beam Search Decoder. (2019). http:\/\/arxiv.org\/abs\/1902.06022  Ronan Collobert Awni Hannun and Gabriel Synnaeve. 2019. A Fully Differentiable Beam Search Decoder. (2019). http:\/\/arxiv.org\/abs\/1902.06022"},{"key":"e_1_3_2_2_26_1","volume-title":"Wav2letter: An End-to-End Convnet-Based Speech Recognition System. (Sept","author":"Collobert Ronan","year":"2016","unstructured":"Ronan Collobert , Christian Puhrsch , and Gabriel Synnaeve . 2016. Wav2letter: An End-to-End Convnet-Based Speech Recognition System. (Sept . 2016 ). http:\/\/arxiv.org\/abs\/1609.03193 Ronan Collobert, Christian Puhrsch, and Gabriel Synnaeve. 2016. Wav2letter: An End-to-End Convnet-Based Speech Recognition System. (Sept. 2016). http:\/\/arxiv.org\/abs\/1609.03193"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1121\/1.2229005"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/1592700.1592731"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2006.1660033"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2004.1326078"},{"key":"e_1_3_2_2_31_1","volume-title":"Spatio-Temporal Channel Correlation Networks for Action Classification. (Feb","author":"Diba Ali","year":"2019","unstructured":"Ali Diba , Mohsen Fayyaz , Vivek Sharma , M.\u00a0 Mahdi Arzani , Rahman Yousefzadeh , Juergen Gall , and Luc Van\u00a0Gool . 2019. Spatio-Temporal Channel Correlation Networks for Action Classification. (Feb . 2019 ). http:\/\/arxiv.org\/abs\/1806.07754 Ali Diba, Mohsen Fayyaz, Vivek Sharma, M.\u00a0Mahdi Arzani, Rahman Yousefzadeh, Juergen Gall, and Luc Van\u00a0Gool. 2019. Spatio-Temporal Channel Correlation Networks for Action Classification. (Feb. 2019). http:\/\/arxiv.org\/abs\/1806.07754"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2011.6012107"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.medengphy.2007.05.003"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6853900"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"crossref","unstructured":"Victoria\u00a0M. Florescu L. Crevier-Buchman B. Denby T. Hueber Antonia Colazo-Simon Claire Pillot-Loiseau P. Roussel-Ragot C. Gendrot and S. Quattrocchi. 2010. Silent Vs Vocalized Articulation for a Portable Ultrasound-Based Silent Speech Interface. In INTERSPEECH.  Victoria\u00a0M. Florescu L. Crevier-Buchman B. Denby T. Hueber Antonia Colazo-Simon Claire Pillot-Loiseau P. Roussel-Ragot C. Gendrot and S. Quattrocchi. 2010. Silent Vs Vocalized Articulation for a Portable Ultrasound-Based Silent Speech Interface. In INTERSPEECH.","DOI":"10.21437\/Interspeech.2010-195"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.304"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.5555\/2832415.2832424"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.medengphy.2010.08.011"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1992.225858"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"e_1_3_2_2_41_1","volume-title":"Proceedings of the 31st International Conference on International Conference on Machine Learning -","volume":"1772","author":"Graves Alex","year":"2014","unstructured":"Alex Graves and Navdeep Jaitly . 2014 . Towards End-to-End Speech Recognition with Recurrent Neural Networks . In Proceedings of the 31st International Conference on International Conference on Machine Learning - Volume 32(ICML\u201914). JMLR.org, Beijing, China, II\u20131764\u2013II\u2013 1772 . Alex Graves and Navdeep Jaitly. 2014. Towards End-to-End Speech Recognition with Recurrent Neural Networks. In Proceedings of the 31st International Conference on International Conference on Machine Learning - Volume 32(ICML\u201914). JMLR.org, Beijing, China, II\u20131764\u2013II\u20131772."},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2016.2639450"},{"key":"e_1_3_2_2_43_1","volume-title":"Deep Speech: Scaling up End-to-End Speech Recognition. (Dec.","author":"Hannun Awni","year":"2014","unstructured":"Awni Hannun , Carl Case , Jared Casper , Bryan Catanzaro , Greg Diamos , Erich Elsen , Ryan Prenger , Sanjeev Satheesh , Shubho Sengupta , Adam Coates , and Andrew\u00a0 Y. Ng . 2014 . Deep Speech: Scaling up End-to-End Speech Recognition. (Dec. 2014). http:\/\/arxiv.org\/abs\/1412.5567 Awni Hannun, Carl Case, Jared Casper, Bryan Catanzaro, Greg Diamos, Erich Elsen, Ryan Prenger, Sanjeev Satheesh, Shubho Sengupta, Adam Coates, and Andrew\u00a0Y. Ng. 2014. Deep Speech: Scaling up End-to-End Speech Recognition. (Dec. 2014). http:\/\/arxiv.org\/abs\/1412.5567"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_2_45_1","unstructured":"Kenneth Heafield Ivan Pouzyrevsky J. Clark and Philipp Koehn. 2013. Scalable Modified Kneser-Ney Language Model Estimation. In ACL.  Kenneth Heafield Ivan Pouzyrevsky J. Clark and Philipp Koehn. 2013. Scalable Modified Kneser-Ney Language Model Estimation. In ACL."},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2011.5946965"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1155\/2007"},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2009.12.001"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2010.579"},{"key":"e_1_3_2_2_50_1","volume-title":"Squeeze-and-Excitation Networks. (May","author":"Hu Jie","year":"2019","unstructured":"Jie Hu , Li Shen , Samuel Albanie , Gang Sun , and Enhua Wu. 2019. Squeeze-and-Excitation Networks. (May 2019 ). http:\/\/arxiv.org\/abs\/1709.01507 Jie Hu, Li Shen, Samuel Albanie, Gang Sun, and Enhua Wu. 2019. Squeeze-and-Excitation Networks. (May 2019). http:\/\/arxiv.org\/abs\/1709.01507"},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2007.366140"},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2009.11.004"},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2009.11.004"},{"key":"e_1_3_2_2_54_1","volume-title":"Proceedings of the 32nd International Conference on International Conference on Machine Learning -","volume":"37","author":"Ioffe Sergey","year":"2015","unstructured":"Sergey Ioffe and Christian Szegedy . 2015 . Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift . In Proceedings of the 32nd International Conference on International Conference on Machine Learning - Volume 37 (Lille, France , 2015-07-06) (ICML\u201915). JMLR.org, 448\u2013456. Sergey Ioffe and Christian Szegedy. 2015. Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift. In Proceedings of the 32nd International Conference on International Conference on Machine Learning - Volume 37 (Lille, France, 2015-07-06) (ICML\u201915). JMLR.org, 448\u2013456."},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2012.59"},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/83.597272"},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2009.11.003"},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2003.1224072"},{"key":"e_1_3_2_2_59_1","doi-asserted-by":"crossref","unstructured":"S. Jou Tanja Schultz Matthias Walliczek F. Kraft and Alexander\u00a0H. Waibel. 2006. Towards Continuous Speech Recognition Using Surface Electromyography. In INTERSPEECH.  S. Jou Tanja Schultz Matthias Walliczek F. Kraft and Alexander\u00a0H. Waibel. 2006. Towards Continuous Speech Recognition Using Surface Electromyography. In INTERSPEECH.","DOI":"10.21437\/Interspeech.2006-212"},{"key":"e_1_3_2_2_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3172944.3172977"},{"key":"e_1_3_2_2_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300376"},{"key":"e_1_3_2_2_62_1","article-title":"Dlib-Ml: A Machine Learning Toolkit","author":"King E.","year":"2009","unstructured":"Davis\u00a0 E. King . 2009 . Dlib-Ml: A Machine Learning Toolkit . The Journal of Machine Learning Research 10 ( Dec. 2009), 1755\u20131758. Davis\u00a0E. King. 2009. Dlib-Ml: A Machine Learning Toolkit. The Journal of Machine Learning Research 10 (Dec. 2009), 1755\u20131758.","journal-title":"The Journal of Machine Learning Research 10"},{"key":"e_1_3_2_2_63_1","volume-title":"Kingma and Jimmy Ba","author":"P.","year":"2017","unstructured":"Diederik\u00a0 P. Kingma and Jimmy Ba . 2017 . Adam : A Method for Stochastic Optimization . (2017). http:\/\/arxiv.org\/abs\/1412.6980 Diederik\u00a0P. Kingma and Jimmy Ba. 2017. Adam: A Method for Stochastic Optimization. (2017). http:\/\/arxiv.org\/abs\/1412.6980"},{"key":"e_1_3_2_2_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2015.69"},{"key":"e_1_3_2_2_65_1","doi-asserted-by":"publisher","DOI":"10.1364\/JOSA.61.000001"},{"key":"e_1_3_2_2_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2013.2284059"},{"key":"e_1_3_2_2_67_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2018.01.010"},{"key":"e_1_3_2_2_68_1","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2014.7078617"},{"key":"e_1_3_2_2_69_1","doi-asserted-by":"crossref","unstructured":"X. Lu Y. Tsao S. Matsuda and C. Hori. 2013. Speech Enhancement Based on Deep Denoising Autoencoder. In INTERSPEECH.  X. Lu Y. Tsao S. Matsuda and C. Hori. 2013. Speech Enhancement Based on Deep Denoising Autoencoder. In INTERSPEECH.","DOI":"10.21437\/Interspeech.2013-130"},{"key":"e_1_3_2_2_70_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1"},{"key":"e_1_3_2_2_71_1","doi-asserted-by":"publisher","DOI":"10.1145\/765891.765971"},{"key":"e_1_3_2_2_72_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2005.1566521"},{"key":"e_1_3_2_2_73_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2003.1200069"},{"key":"e_1_3_2_2_74_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2000.861925"},{"key":"e_1_3_2_2_75_1","doi-asserted-by":"crossref","unstructured":"K. Noda Y. Yamaguchi K. Nakadai H. Okuno and Tetsuya Ogata. 2014. Lipreading Using Convolutional Neural Network. In INTERSPEECH.  K. Noda Y. Yamaguchi K. Nakadai H. Okuno and Tetsuya Ogata. 2014. Lipreading Using Convolutional Neural Network. In INTERSPEECH.","DOI":"10.21437\/Interspeech.2014-293"},{"key":"e_1_3_2_2_76_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"e_1_3_2_2_77_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2009.11.006"},{"key":"e_1_3_2_2_78_1","first-page":"92","volume-title":"The Design for the Wall Street Journal-Based Csr Corpus. In Speech and Natural Language: Proceedings of a Workshop Held at Harriman","author":"B.","year":"1992","unstructured":"Douglas\u00a0 B. Paul and Janet\u00a0M. Baker. 1992 . The Design for the Wall Street Journal-Based Csr Corpus. In Speech and Natural Language: Proceedings of a Workshop Held at Harriman , New York , February 23-26, 1992 . https:\/\/www.aclweb.org\/anthology\/H 92 - 1073 Douglas\u00a0B. Paul and Janet\u00a0M. Baker. 1992. The Design for the Wall Street Journal-Based Csr Corpus. In Speech and Natural Language: Proceedings of a Workshop Held at Harriman, New York, February 23-26, 1992. https:\/\/www.aclweb.org\/anthology\/H92-1073"},{"key":"e_1_3_2_2_79_1","doi-asserted-by":"publisher","DOI":"10.5555\/1987717.1987724"},{"key":"e_1_3_2_2_80_1","doi-asserted-by":"crossref","unstructured":"T. Pellegrini and I. Trancoso. 2010. Improving Asr Error Detection with Non-Decoder Based Features. In INTERSPEECH.  T. Pellegrini and I. Trancoso. 2010. Improving Asr Error Detection with Non-Decoder Based Features. In INTERSPEECH.","DOI":"10.21437\/Interspeech.2010-98"},{"key":"e_1_3_2_2_81_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472088"},{"key":"e_1_3_2_2_82_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472088"},{"key":"e_1_3_2_2_83_1","doi-asserted-by":"publisher","DOI":"10.5220\/0001554303760381"},{"key":"e_1_3_2_2_84_1","doi-asserted-by":"publisher","DOI":"10.5220\/0001554303760381"},{"key":"e_1_3_2_2_85_1","unstructured":"Daniel Povey Arnab Ghoshal Gilles Boulianne Lukas Burget Ondrej Glembek Nagendra Goel Mirko Hannemann Petr Motlicek Yanmin Qian Petr Schwarz Jan Silovsky Georg Stemmer and Karel Vesely. 2011. The Kaldi Speech Recognition Toolkit. https:\/\/infoscience.epfl.ch\/record\/192584 Conference Name: IEEE 2011 Workshop on Automatic Speech Recognition and Understanding Number: CONF Publisher: IEEE Signal Processing Society.  Daniel Povey Arnab Ghoshal Gilles Boulianne Lukas Burget Ondrej Glembek Nagendra Goel Mirko Hannemann Petr Motlicek Yanmin Qian Petr Schwarz Jan Silovsky Georg Stemmer and Karel Vesely. 2011. The Kaldi Speech Recognition Toolkit. https:\/\/infoscience.epfl.ch\/record\/192584 Conference Name: IEEE 2011 Workshop on Automatic Speech Recognition and Understanding Number: CONF Publisher: IEEE Signal Processing Society."},{"key":"e_1_3_2_2_86_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSA.2005.855838"},{"key":"e_1_3_2_2_87_1","doi-asserted-by":"publisher","DOI":"10.3115\/1620950.1620955"},{"key":"e_1_3_2_2_88_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2013.59"},{"key":"e_1_3_2_2_89_1","doi-asserted-by":"publisher","DOI":"10.5555\/1613984.1614006"},{"key":"e_1_3_2_2_90_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2009.12.002"},{"key":"e_1_3_2_2_91_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSLP.1996.607433"},{"key":"e_1_3_2_2_92_1","volume-title":"Dropout: A Simple Way to Prevent Neural Networks from Overfitting. 15, 1","author":"Srivastava Nitish","year":"2014","unstructured":"Nitish Srivastava , Geoffrey Hinton , Alex Krizhevsky , Ilya Sutskever , and Ruslan Salakhutdinov . 2014 . Dropout: A Simple Way to Prevent Neural Networks from Overfitting. 15, 1 (2014), 1929\u20131958. Nitish Srivastava, Geoffrey Hinton, Alex Krizhevsky, Ilya Sutskever, and Ruslan Salakhutdinov. 2014. Dropout: A Simple Way to Prevent Neural Networks from Overfitting. 15, 1 (2014), 1929\u20131958."},{"key":"e_1_3_2_2_93_1","doi-asserted-by":"publisher","DOI":"10.21437\/INTERSPEECH.2017-85"},{"key":"e_1_3_2_2_94_1","doi-asserted-by":"publisher","DOI":"10.1145\/3242587.3242599"},{"key":"e_1_3_2_2_95_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.95.26.15861"},{"key":"e_1_3_2_2_96_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.94.26.14965"},{"key":"e_1_3_2_2_97_1","doi-asserted-by":"publisher","DOI":"10.1109\/APSIPA.2015.7415335"},{"key":"e_1_3_2_2_98_1","doi-asserted-by":"publisher","DOI":"10.1121\/1.428324"},{"key":"e_1_3_2_2_99_1","volume-title":"Proceedings of The 2012 Asia Pacific Signal and Information Processing Association Annual Summit and Conference. 1\u20134.","author":"Ukai Naoya","year":"2012","unstructured":"Naoya Ukai , Takumi Seko , Satoshi Tamura , and Satoru Hayamizu . 2012 . Gif-Lr: GA-Based Informative Feature for Lipreading . In Proceedings of The 2012 Asia Pacific Signal and Information Processing Association Annual Summit and Conference. 1\u20134. Naoya Ukai, Takumi Seko, Satoshi Tamura, and Satoru Hayamizu. 2012. Gif-Lr: GA-Based Informative Feature for Lipreading. In Proceedings of The 2012 Asia Pacific Signal and Information Processing Association Annual Summit and Conference. 1\u20134."},{"key":"e_1_3_2_2_100_1","volume-title":"(Dec","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani , Noam Shazeer , Niki Parmar , Jakob Uszkoreit , Llion Jones , Aidan\u00a0 N. Gomez , Lukasz Kaiser , and Illia Polosukhin . 2017. Attention Is All You Need. (Dec . 2017 ). http:\/\/arxiv.org\/abs\/1706.03762 Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N. Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention Is All You Need. (Dec. 2017). http:\/\/arxiv.org\/abs\/1706.03762"},{"key":"e_1_3_2_2_101_1","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390294"},{"key":"e_1_3_2_2_102_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472852"},{"key":"e_1_3_2_2_103_1","doi-asserted-by":"publisher","DOI":"10.5220\/0003169702950300"},{"key":"e_1_3_2_2_104_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2013.2261309"},{"key":"e_1_3_2_2_105_1","doi-asserted-by":"publisher","DOI":"10.1109\/FG.2018.00118"},{"key":"e_1_3_2_2_106_1","volume-title":"Deep Retinex Decomposition for Low-Light Enhancement. (Aug","author":"Wei Chen","year":"2018","unstructured":"Chen Wei , Wenjing Wang , Wenhan Yang , and Jiaying Liu . 2018. Deep Retinex Decomposition for Low-Light Enhancement. (Aug . 2018 ). http:\/\/arxiv.org\/abs\/1808.04560 Chen Wei, Wenjing Wang, Wenhan Yang, and Jiaying Liu. 2018. Deep Retinex Decomposition for Low-Light Enhancement. (Aug. 2018). http:\/\/arxiv.org\/abs\/1808.04560"},{"key":"e_1_3_2_2_107_1","volume-title":"LCANet: End-to-end Lipreading with Cascaded Attention-CTC. In 2018 13th IEEE International Conference on Automatic Face & Gesture Recognition (FG","author":"Xu Kai","year":"2018","unstructured":"Kai Xu , Dawei Li , Nick Cassimatis , and Xiaolong Wang . 2018 . LCANet: End-to-end Lipreading with Cascaded Attention-CTC. In 2018 13th IEEE International Conference on Automatic Face & Gesture Recognition (FG 2018). IEEE, 548\u2013555. Kai Xu, Dawei Li, Nick Cassimatis, and Xiaolong Wang. 2018. LCANet: End-to-end Lipreading with Cascaded Attention-CTC. In 2018 13th IEEE International Conference on Automatic Face & Gesture Recognition (FG 2018). IEEE, 548\u2013555."},{"key":"e_1_3_2_2_108_1","volume-title":"Generalized Cross Entropy Loss for Training Deep Neural Networks with Noisy Labels. (Nov","author":"Zhang Zhilu","year":"2018","unstructured":"Zhilu Zhang and Mert\u00a0 R. Sabuncu . 2018. Generalized Cross Entropy Loss for Training Deep Neural Networks with Noisy Labels. (Nov . 2018 ). http:\/\/arxiv.org\/abs\/1805.07836 Zhilu Zhang and Mert\u00a0R. Sabuncu. 2018. Generalized Cross Entropy Loss for Training Deep Neural Networks with Noisy Labels. (Nov. 2018). http:\/\/arxiv.org\/abs\/1805.07836"},{"key":"e_1_3_2_2_109_1","volume-title":"Loss Functions for Neural Networks for Image Processing. (April","author":"Zhao Hang","year":"2018","unstructured":"Hang Zhao , Orazio Gallo , Iuri Frosio , and Jan Kautz . 2018. Loss Functions for Neural Networks for Image Processing. (April 2018 ). http:\/\/arxiv.org\/abs\/1511.08861 Hang Zhao, Orazio Gallo, Iuri Frosio, and Jan Kautz. 2018. Loss Functions for Neural Networks for Image Processing. (April 2018). http:\/\/arxiv.org\/abs\/1511.08861"},{"key":"e_1_3_2_2_110_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSA.2005.851874"},{"key":"e_1_3_2_2_111_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2014.06.004"}],"event":{"name":"CHI '21: CHI Conference on Human Factors in Computing Systems","location":"Yokohama Japan","acronym":"CHI '21","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 2021 CHI Conference on Human Factors in Computing Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3411764.3445565","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3411764.3445565","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T21:28:49Z","timestamp":1750195729000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3411764.3445565"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,5,6]]},"references-count":111,"alternative-id":["10.1145\/3411764.3445565","10.1145\/3411764"],"URL":"https:\/\/doi.org\/10.1145\/3411764.3445565","relation":{},"subject":[],"published":{"date-parts":[[2021,5,6]]},"assertion":[{"value":"2021-05-07","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}