{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T16:43:57Z","timestamp":1772642637071,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":121,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,11]],"date-time":"2024-05-11T00:00:00Z","timestamp":1715385600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2239633"],"award-info":[{"award-number":["2239633"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,11]]},"DOI":"10.1145\/3613904.3642348","type":"proceedings-article","created":{"date-parts":[[2024,5,11]],"date-time":"2024-05-11T08:37:41Z","timestamp":1715416661000},"page":"1-23","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":14,"title":["MELDER: The Design and Evaluation of a Real-time Silent Speech Recognizer for Mobile Devices"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4784-8730","authenticated-orcid":false,"given":"Laxmi","family":"Pandey","sequence":"first","affiliation":[{"name":"Inclusive Interaction Lab, University of California, Merced, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8384-4764","authenticated-orcid":false,"given":"Ahmed Sabbir","family":"Arif","sequence":"additional","affiliation":[{"name":"Inclusive Interaction Lab, University of California, Merced, United States"}]}],"member":"320","published-online":{"date-parts":[[2024,5,11]]},"reference":[{"key":"e_1_3_3_3_1_1","doi-asserted-by":"publisher","unstructured":"Mart\u00edn Abadi Ashish Agarwal Paul Barham Eugene Brevdo Zhifeng Chen Craig Citro Greg\u00a0S. Corrado Andy Davis Jeffrey Dean Matthieu Devin Sanjay Ghemawat Ian Goodfellow Andrew Harp Geoffrey Irving Michael Isard Yangqing Jia Rafal Jozefowicz Lukasz Kaiser Manjunath Kudlur Josh Levenberg Dan Mane Rajat Monga Sherry Moore Derek Murray Chris Olah Mike Schuster Jonathon Shlens Benoit Steiner Ilya Sutskever Kunal Talwar Paul Tucker Vincent Vanhoucke Vijay Vasudevan Fernanda Viegas Oriol Vinyals Pete Warden Martin Wattenberg Martin Wicke Yuan Yu and Xiaoqiang Zheng. 2016. TensorFlow: Large-Scale Machine Learning on Heterogeneous Distributed Systems. https:\/\/doi.org\/10.48550\/arXiv.1603.04467 arXiv:1603.04467 [cs].","DOI":"10.48550\/arXiv.1603.04467"},{"key":"e_1_3_3_3_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2889052"},{"key":"e_1_3_3_3_3_1","doi-asserted-by":"publisher","unstructured":"Triantafyllos Afouras Joon\u00a0Son Chung and Andrew Zisserman. 2018. Deep Lip Reading: A Comparison of Models and an Online Application. https:\/\/doi.org\/10.48550\/arXiv.1806.06053 arXiv:1806.06053 [cs].","DOI":"10.48550\/arXiv.1806.06053"},{"key":"e_1_3_3_3_4_1","doi-asserted-by":"publisher","unstructured":"Triantafyllos Afouras Joon\u00a0Son Chung and Andrew Zisserman. 2018. LRS3-TED: A Large-Scale Dataset for Visual Speech Recognition. https:\/\/doi.org\/10.48550\/arXiv.1809.00496 arXiv:1809.00496 [cs].","DOI":"10.48550\/arXiv.1809.00496"},{"key":"e_1_3_3_3_5_1","doi-asserted-by":"publisher","unstructured":"Abien\u00a0Fred Agarap. 2019. Deep Learning using Rectified Linear Units (ReLU). https:\/\/doi.org\/10.48550\/arXiv.1803.08375 arXiv:1803.08375 [cs stat].","DOI":"10.48550\/arXiv.1803.08375"},{"key":"e_1_3_3_3_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472172"},{"key":"e_1_3_3_3_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447404.3447410"},{"key":"e_1_3_3_3_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIC-STH.2009.5444533"},{"key":"e_1_3_3_3_9_1","volume-title":"LipNet: End-to-End Sentence-level Lipreading. arXiv:1611.01599 [cs] (Dec","author":"Assael M.","year":"2016","unstructured":"Yannis\u00a0M. Assael, Brendan Shillingford, Shimon Whiteson, and Nando de Freitas. 2016. LipNet: End-to-End Sentence-level Lipreading. arXiv:1611.01599 [cs] (Dec. 2016). http:\/\/arxiv.org\/abs\/1611.01599 arXiv:1611.01599."},{"key":"e_1_3_3_3_10_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jneumeth.2008.06.030"},{"key":"e_1_3_3_3_11_1","doi-asserted-by":"publisher","DOI":"10.3390\/app9183870"},{"key":"e_1_3_3_3_12_1","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0170531"},{"key":"e_1_3_3_3_13_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2010.01.001"},{"key":"e_1_3_3_3_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3379337.3415879"},{"key":"e_1_3_3_3_15_1","doi-asserted-by":"publisher","unstructured":"Junyoung Chung Caglar Gulcehre KyungHyun Cho and Yoshua Bengio. 2014. Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling. https:\/\/doi.org\/10.48550\/arXiv.1412.3555 arXiv:1412.3555 [cs].","DOI":"10.48550\/arXiv.1412.3555"},{"key":"e_1_3_3_3_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-54184-6_6"},{"key":"e_1_3_3_3_17_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-54427-4_19"},{"key":"e_1_3_3_3_18_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2018.02.001"},{"key":"e_1_3_3_3_19_1","doi-asserted-by":"publisher","DOI":"10.4324\/9780203771587"},{"key":"e_1_3_3_3_20_1","first-page":"2640","volume-title":"Proceedings of the 36th International Conference on Machine Learning. PMLR, 1341\u20131350","author":"Collobert Ronan","year":"2019","unstructured":"Ronan Collobert, Awni Hannun, and Gabriel Synnaeve. 2019. A Fully Differentiable Beam Search Decoder. In Proceedings of the 36th International Conference on Machine Learning. PMLR, 1341\u20131350. https:\/\/proceedings.mlr.press\/v97\/collobert19a.html ISSN: 2640-3498."},{"key":"e_1_3_3_3_21_1","doi-asserted-by":"publisher","DOI":"10.1121\/1.2229005"},{"key":"e_1_3_3_3_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/1592700.1592731"},{"key":"e_1_3_3_3_23_1","unstructured":"Mark Davies. 2022. N-Grams Based on 520 Million Word COCA Corpus. https:\/\/www.ngrams.info\/coca2020.asp"},{"key":"e_1_3_3_3_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2006.1660033"},{"key":"e_1_3_3_3_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2004.1326078"},{"key":"e_1_3_3_3_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-07863-2_32"},{"key":"e_1_3_3_3_27_1","doi-asserted-by":"publisher","DOI":"10.1080\/10447318.2014.986642"},{"key":"e_1_3_3_3_28_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.medengphy.2007.05.003"},{"key":"e_1_3_3_3_29_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2010-195"},{"key":"e_1_3_3_3_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3242587.3242603"},{"key":"e_1_3_3_3_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3411830"},{"key":"e_1_3_3_3_32_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.medengphy.2010.08.011"},{"key":"e_1_3_3_3_33_1","unstructured":"Alexandre Gonfalonieri. 2018. How Amazon Alexa Works? Your Guide to Natural Language Processing (AI). https:\/\/towardsdatascience.com\/how-amazon-alexa-works-your-guide-to-natural-language-processing-ai-7506004709d3"},{"key":"e_1_3_3_3_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2004.1326155"},{"key":"e_1_3_3_3_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"e_1_3_3_3_36_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2005.08100"},{"key":"e_1_3_3_3_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_3_3_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2011.5946965"},{"key":"e_1_3_3_3_39_1","doi-asserted-by":"publisher","DOI":"10.1155\/2007"},{"key":"e_1_3_3_3_40_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2009.12.001"},{"key":"e_1_3_3_3_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3526114.3558715"},{"key":"e_1_3_3_3_42_1","doi-asserted-by":"crossref","unstructured":"Jie Hu Li Shen and Gang Sun. 2018. Squeeze-and-Excitation Networks. 7132\u20137141. https:\/\/openaccess.thecvf.com\/content_cvpr_2018\/html\/Hu_Squeeze-and-Excitation_Networks_CVPR_2018_paper.html","DOI":"10.1109\/CVPR.2018.00745"},{"key":"e_1_3_3_3_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2007.366140"},{"key":"e_1_3_3_3_44_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2009.11.004"},{"key":"e_1_3_3_3_45_1","doi-asserted-by":"publisher","DOI":"10.5057\/isase.2023-C000013"},{"key":"e_1_3_3_3_46_1","volume-title":"Proceedings of the 32nd International Conference on Machine Learning. PMLR, 448\u2013456","author":"Ioffe Sergey","year":"2015","unstructured":"Sergey Ioffe and Christian Szegedy. 2015. Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift. In Proceedings of the 32nd International Conference on Machine Learning. PMLR, 448\u2013456. https:\/\/proceedings.mlr.press\/v37\/ioffe15.html ISSN: 1938-7228."},{"key":"e_1_3_3_3_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534613"},{"key":"e_1_3_3_3_48_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2009.11.003"},{"key":"e_1_3_3_3_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2003.1224072"},{"key":"e_1_3_3_3_50_1","unstructured":"Szu-Chen Jou Tanja Schultz Matthias Walliczek Florian Kraft and Alex Waibel. 2006. Towards Continuous Speech Recognition Using Surface Electromyography. (2006)."},{"key":"e_1_3_3_3_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3172944.3172977"},{"key":"e_1_3_3_3_52_1","first-page":"2640","volume-title":"Proceedings of the Machine Learning for Health NeurIPS Workshop. PMLR, 25\u201338","author":"Kapur Arnav","year":"2020","unstructured":"Arnav Kapur, Utkarsh Sarawgi, Eric Wadkins, Matthew Wu, Nora Hollenstein, and Pattie Maes. 2020. Non-Invasive Silent Speech Recognition in Multiple Sclerosis with Dysphonia. In Proceedings of the Machine Learning for Health NeurIPS Workshop. PMLR, 25\u201338. https:\/\/proceedings.mlr.press\/v116\/kapur20a.html ISSN: 2640-3498."},{"key":"e_1_3_3_3_53_1","doi-asserted-by":"publisher","unstructured":"Sara Kashiwagi Keitaro Tanaka Qi Feng and Shigeo Morishima. 2023. Improving the Gap in Visual Speech Recognition Between Normal and Silent Speech Based on Metric Learning. https:\/\/doi.org\/10.48550\/arXiv.2305.14203 arXiv:2305.14203 [cs eess].","DOI":"10.48550\/arXiv.2305.14203"},{"key":"e_1_3_3_3_54_1","unstructured":"Pavel Khlebovich. 2023. IP Webcam - Apps on Google Play. https:\/\/play.google.com\/store\/apps\/details?id=com.pas.webcam&hl=en_US"},{"key":"e_1_3_3_3_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3502015"},{"key":"e_1_3_3_3_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3399715.3399852"},{"key":"e_1_3_3_3_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300376"},{"key":"e_1_3_3_3_58_1","doi-asserted-by":"publisher","DOI":"10.5555\/1577069.1755843"},{"key":"e_1_3_3_3_59_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1412.6980"},{"key":"e_1_3_3_3_60_1","doi-asserted-by":"crossref","unstructured":"Oscar Koller Hermann Ney and Richard Bowden. 2015. Deep Learning of Mouth Shapes for Sign Language. 85\u201391. https:\/\/www.cv-foundation.org\/openaccess\/content_iccv_2015_workshops\/w12\/html\/Koller_Deep_Learning_of_ICCV_2015_paper.html","DOI":"10.1109\/ICCVW.2015.69"},{"key":"e_1_3_3_3_61_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ijcce.2022.01.003"},{"key":"e_1_3_3_3_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/3519391.3519399"},{"key":"e_1_3_3_3_63_1","volume-title":"Soviet physics doklady, Vol.\u00a010. MAIK Nauka\/Interperiodica","author":"Levenshtein I","unstructured":"Vladimir\u00a0I Levenshtein and others. 1966. Binary Codes Capable of Correcting Deletions, Insertions, and Reversals. In Soviet physics doklady, Vol.\u00a010. MAIK Nauka\/Interperiodica, Soviet Union, 707\u2013710. Issue: 8."},{"key":"e_1_3_3_3_64_1","doi-asserted-by":"publisher","DOI":"10.1145\/3311823.3311831"},{"key":"e_1_3_3_3_65_1","first-page":"541","article-title":"Communication Strategies for Nurses Interacting with Patients Who Are Deaf","volume":"19","author":"Lieu Christine","year":"2007","unstructured":"Christine Chong-hee Lieu, Georgia\u00a0Robins Sadler, Judith\u00a0T Fullerton, and Paulette\u00a0Deyo Stohlmann. 2007. Communication Strategies for Nurses Interacting with Patients Who Are Deaf. Dermatology Nursing 19, 6 (2007), 541. Publisher: Anthony J. Jannetti, Inc..","journal-title":"Dermatology Nursing"},{"key":"e_1_3_3_3_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485730.3485945"},{"key":"e_1_3_3_3_67_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2007.366989"},{"key":"e_1_3_3_3_68_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1"},{"key":"e_1_3_3_3_69_1","doi-asserted-by":"publisher","DOI":"10.1145\/765891.765971"},{"key":"e_1_3_3_3_70_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2005.1566521"},{"key":"e_1_3_3_3_71_1","first-page":"13","article-title":"Tables of Single-Letter and Digram Frequency Counts for Various Word-Length and Letter-Position Combinations","volume":"1","author":"Mayzner S.","year":"1965","unstructured":"M.\u00a0S. Mayzner and M.\u00a0E. Tresselt. 1965. Tables of Single-Letter and Digram Frequency Counts for Various Word-Length and Letter-Position Combinations. Psychonomic Monograph Supplements 1, 2 (1965), 13\u201332.","journal-title":"Psychonomic Monograph Supplements"},{"key":"e_1_3_3_3_72_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2003.1200069"},{"key":"e_1_3_3_3_73_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2000.861925"},{"key":"e_1_3_3_3_74_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2014-293"},{"key":"e_1_3_3_3_75_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2011.6130355"},{"key":"e_1_3_3_3_76_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2009.191"},{"key":"e_1_3_3_3_77_1","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445565"},{"key":"e_1_3_3_3_78_1","volume-title":"Silent Speech and Emotion Recognition from Vocal Tract Shape Dynamics in Real-Time MRI. arXiv:2106.08706 [cs, eess] (June","author":"Pandey Laxmi","year":"2021","unstructured":"Laxmi Pandey and Ahmed\u00a0Sabbir Arif. 2021. Silent Speech and Emotion Recognition from Vocal Tract Shape Dynamics in Real-Time MRI. arXiv:2106.08706 [cs, eess] (June 2021). http:\/\/arxiv.org\/abs\/2106.08706 arXiv:2106.08706."},{"key":"e_1_3_3_3_79_1","doi-asserted-by":"publisher","DOI":"10.1145\/3567723"},{"key":"e_1_3_3_3_80_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491101.3519611"},{"key":"e_1_3_3_3_81_1","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445430"},{"key":"e_1_3_3_3_82_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2008.2011515"},{"key":"e_1_3_3_3_83_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2009.11.006"},{"key":"e_1_3_3_3_84_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472088"},{"key":"e_1_3_3_3_85_1","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2018.8639643"},{"key":"e_1_3_3_3_86_1","doi-asserted-by":"publisher","unstructured":"Anne Porbadnigk Marek Wester Jan Calliess and Tanja Schultz. 2009. EEG-Based Speech Recognition - Impact of Temporal Effects Vol.\u00a01. SCITEPRESS 376\u2013381. https:\/\/doi.org\/10.5220\/0001554303760381","DOI":"10.5220\/0001554303760381"},{"key":"e_1_3_3_3_87_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2003.817150"},{"key":"e_1_3_3_3_88_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSA.2005.855838"},{"key":"e_1_3_3_3_89_1","doi-asserted-by":"publisher","DOI":"10.1145\/3161187"},{"key":"e_1_3_3_3_90_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvoice.2004.10.007"},{"key":"e_1_3_3_3_91_1","doi-asserted-by":"crossref","unstructured":"Christos Sagonas Georgios Tzimiropoulos Stefanos Zafeiriou and Maja Pantic. 2013. 300 Faces in-the-Wild Challenge: The First Facial Landmark Localization Challenge. 397\u2013403. https:\/\/www.cv-foundation.org\/openaccess\/content_iccv_workshops_2013\/W11\/html\/Sagonas_300_Faces_in-the-Wild_2013_ICCV_paper.html","DOI":"10.1109\/ICCVW.2013.59"},{"key":"e_1_3_3_3_92_1","volume-title":"Situationally-Induced Impairments and Disabilities Research. arXiv:1904.06128 [cs] (April","author":"Sarsenbayeva Zhanna","year":"2019","unstructured":"Zhanna Sarsenbayeva, Vassilis Kostakos, and Jorge Goncalves. 2019. Situationally-Induced Impairments and Disabilities Research. arXiv:1904.06128 [cs] (April 2019). http:\/\/arxiv.org\/abs\/1904.06128 arXiv:1904.06128."},{"key":"e_1_3_3_3_93_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2009.12.002"},{"key":"e_1_3_3_3_94_1","doi-asserted-by":"publisher","unstructured":"Brendan Shillingford Yannis Assael Matthew\u00a0W. Hoffman Thomas Paine C\u00edan Hughes Utsav Prabhu Hank Liao Hasim Sak Kanishka Rao Lorrayne Bennett Marie Mulville Ben Coppin Ben Laurie Andrew Senior and Nando de Freitas. 2018. Large-Scale Visual Speech Recognition. https:\/\/doi.org\/10.48550\/arXiv.1807.05162 arXiv:1807.05162 [cs].","DOI":"10.48550\/arXiv.1807.05162"},{"key":"e_1_3_3_3_95_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2023.103738"},{"key":"e_1_3_3_3_96_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46349-0_5"},{"key":"e_1_3_3_3_97_1","doi-asserted-by":"crossref","unstructured":"Joon Son\u00a0Chung Andrew Senior Oriol Vinyals and Andrew Zisserman. 2017. Lip Reading Sentences in the Wild. 6447\u20136456. https:\/\/openaccess.thecvf.com\/content_cvpr_2017\/html\/Chung_Lip_Reading_Sentences_CVPR_2017_paper.html","DOI":"10.1109\/CVPR.2017.367"},{"key":"e_1_3_3_3_98_1","volume-title":"Dropout: A Simple Way to Prevent Neural Networks from Overfitting. The journal of machine learning research 15, 1","author":"Srivastava Nitish","year":"2014","unstructured":"Nitish Srivastava, Geoffrey Hinton, Alex Krizhevsky, Ilya Sutskever, and Ruslan Salakhutdinov. 2014. Dropout: A Simple Way to Prevent Neural Networks from Overfitting. The journal of machine learning research 15, 1 (2014), 1929\u20131958. Publisher: JMLR. org."},{"key":"e_1_3_3_3_99_1","doi-asserted-by":"publisher","DOI":"10.1145\/3550281"},{"key":"e_1_3_3_3_100_1","doi-asserted-by":"publisher","unstructured":"Themos Stafylakis and Georgios Tzimiropoulos. 2017. Combining Residual Networks with LSTMs for Lipreading. https:\/\/doi.org\/10.48550\/arXiv.1703.04105 arXiv:1703.04105 [cs].","DOI":"10.48550\/arXiv.1703.04105"},{"key":"e_1_3_3_3_101_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581465"},{"key":"e_1_3_3_3_102_1","doi-asserted-by":"publisher","DOI":"10.1145\/3448018.3458011"},{"key":"e_1_3_3_3_103_1","doi-asserted-by":"publisher","DOI":"10.1145\/3242587.3242599"},{"key":"e_1_3_3_3_104_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.95.26.15861"},{"key":"e_1_3_3_3_105_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.94.26.14965"},{"key":"e_1_3_3_3_106_1","doi-asserted-by":"publisher","unstructured":"Satoshi Tamura Hiroshi Ninomiya Norihide Kitaoka Shin Osuga Yurie Iribe Kazuya Takeda and Satoru Hayamizu. 2015. Audio-Visual Speech Recognition Using Deep Bottleneck Features and High-Performance Lipreading. In 2015 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA). 575\u2013582. https:\/\/doi.org\/10.1109\/APSIPA.2015.7415335","DOI":"10.1109\/APSIPA.2015.7415335"},{"key":"e_1_3_3_3_107_1","doi-asserted-by":"publisher","DOI":"10.1121\/1.428324"},{"key":"e_1_3_3_3_108_1","volume-title":"Proceedings of The 2012 Asia Pacific Signal and Information Processing Association Annual Summit and Conference. 1\u20134.","author":"Ukai Naoya","year":"2012","unstructured":"Naoya Ukai, Takumi Seko, Satoshi Tamura, and Satoru Hayamizu. 2012. GIF-LR:GA-Based Informative Feature for Lipreading. In Proceedings of The 2012 Asia Pacific Signal and Information Processing Association Annual Summit and Conference. 1\u20134."},{"key":"e_1_3_3_3_109_1","volume-title":"Advances in Neural Information Processing Systems, Vol.\u00a030. Curran Associates","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is All you Need. In Advances in Neural Information Processing Systems, Vol.\u00a030. Curran Associates, Inc.https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2017\/hash\/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html"},{"key":"e_1_3_3_3_110_1","doi-asserted-by":"publisher","DOI":"10.1145\/2037373.2037418"},{"key":"e_1_3_3_3_111_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472852"},{"key":"e_1_3_3_3_112_1","doi-asserted-by":"publisher","DOI":"10.5220\/0003169702950300"},{"key":"e_1_3_3_3_113_1","unstructured":"Wikipedia. 2022. Ablation (artificial Intelligence). https:\/\/en.wikipedia.org\/w\/index.php?title=Ablation_(artificial_intelligence)&oldid=1097614343 Page Version ID: 1097614343."},{"key":"e_1_3_3_3_114_1","doi-asserted-by":"publisher","DOI":"10.1145\/3307334.3326073"},{"key":"e_1_3_3_3_115_1","volume-title":"Advances in Neural Information Processing Systems, Vol.\u00a027. Curran Associates","author":"Yosinski Jason","year":"2014","unstructured":"Jason Yosinski, Jeff Clune, Yoshua Bengio, and Hod Lipson. 2014. How Transferable Are Features in Deep Neural Networks?. In Advances in Neural Information Processing Systems, Vol.\u00a027. Curran Associates, Inc.https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2014\/hash\/375c71349b295fbe2dcdca9206f20a06-Abstract.html"},{"key":"e_1_3_3_3_116_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580838"},{"key":"e_1_3_3_3_117_1","doi-asserted-by":"publisher","DOI":"10.1145\/3494987"},{"key":"e_1_3_3_3_118_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3580801"},{"key":"e_1_3_3_3_119_1","doi-asserted-by":"crossref","unstructured":"Xingxuan Zhang Feng Cheng and Shilin Wang. 2019. Spatio-Temporal Fusion Based Convolutional Sequence Learning for Lip Reading. 713\u2013722. https:\/\/openaccess.thecvf.com\/content_ICCV_2019\/html\/Zhang_Spatio-Temporal_Fusion_Based_Convolutional_Sequence_Learning_for_Lip_Reading_ICCV_2019_paper.html","DOI":"10.1109\/ICCV.2019.00080"},{"key":"e_1_3_3_3_120_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2014.06.004"},{"key":"e_1_3_3_3_121_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2020.3004555"}],"event":{"name":"CHI '24: CHI Conference on Human Factors in Computing Systems","location":"Honolulu HI USA","acronym":"CHI '24","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction","SIGACCESS ACM Special Interest Group on Accessible Computing"]},"container-title":["Proceedings of the CHI Conference on Human Factors in Computing Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3613904.3642348","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3613904.3642348","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T23:44:25Z","timestamp":1750290265000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3613904.3642348"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,11]]},"references-count":121,"alternative-id":["10.1145\/3613904.3642348","10.1145\/3613904"],"URL":"https:\/\/doi.org\/10.1145\/3613904.3642348","relation":{},"subject":[],"published":{"date-parts":[[2024,5,11]]},"assertion":[{"value":"2024-05-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}