{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T21:20:02Z","timestamp":1776115202754,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":74,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,4,19]],"date-time":"2023-04-19T00:00:00Z","timestamp":1681862400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,4,19]]},"DOI":"10.1145\/3544548.3580801","type":"proceedings-article","created":{"date-parts":[[2023,4,20]],"date-time":"2023-04-20T04:28:44Z","timestamp":1681964924000},"page":"1-18","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":50,"title":["EchoSpeech: Continuous Silent Speech Recognition on Minimally-obtrusive Eyewear Powered by Acoustic Sensing"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8329-0522","authenticated-orcid":false,"given":"Ruidong","family":"Zhang","sequence":"first","affiliation":[{"name":"Information Science, Cornell University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4208-7904","authenticated-orcid":false,"given":"Ke","family":"Li","sequence":"additional","affiliation":[{"name":"Information Science, Cornell University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7047-3917","authenticated-orcid":false,"given":"Yihong","family":"Hao","sequence":"additional","affiliation":[{"name":"Computer Science, Cornell University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8609-7066","authenticated-orcid":false,"given":"Yufan","family":"Wang","sequence":"additional","affiliation":[{"name":"Information Science, Cornell University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8933-7296","authenticated-orcid":false,"given":"Zhengnan","family":"Lai","sequence":"additional","affiliation":[{"name":"Information Science, Cornell University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5510-6799","authenticated-orcid":false,"given":"Fran\u00e7ois","family":"Guimbreti\u00e8re","sequence":"additional","affiliation":[{"name":"Information Science, Cornell University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5079-5927","authenticated-orcid":false,"given":"Cheng","family":"Zhang","sequence":"additional","affiliation":[{"name":"Information Science, Cornell University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,4,19]]},"reference":[{"key":"e_1_3_3_3_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461856"},{"key":"e_1_3_3_3_2_1","doi-asserted-by":"publisher","DOI":"10.1121\/1.5042758"},{"key":"e_1_3_3_3_3_1","unstructured":"Yannis\u00a0M. Assael Brendan Shillingford Shimon Whiteson and Nando de Freitas. 2016. LipNet: Sentence-level Lipreading. CoRR abs\/1611.01599(2016). arXiv:1611.01599http:\/\/arxiv.org\/abs\/1611.01599"},{"key":"e_1_3_3_3_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2015.310"},{"key":"e_1_3_3_3_5_1","unstructured":"Linnar Billman and Johan Hullberg. 2018. Speech Reading with Deep Neural Networks."},{"key":"e_1_3_3_3_6_1","doi-asserted-by":"publisher","DOI":"10.5220\/0006573200560062"},{"key":"e_1_3_3_3_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3379337.3415879"},{"key":"e_1_3_3_3_8_1","volume-title":"British Machine Vision Conference","author":"Chung J","year":"2017","unstructured":"J Chung and A Zisserman. 2017. Lip reading in profile. British Machine Vision Conference, 2017(2017)."},{"key":"e_1_3_3_3_9_1","volume-title":"Computer Vision \u2013 ACCV","author":"Chung Joon\u00a0Son","year":"2016","unstructured":"Joon\u00a0Son Chung and Andrew Zisserman. 2017. Lip Reading in the Wild. In Computer Vision \u2013 ACCV 2016, Shang-Hong Lai, Vincent Lepetit, Ko\u00a0Nishino, and Yoichi Sato (Eds.). Springer International Publishing, Cham, 87\u2013103."},{"key":"e_1_3_3_3_10_1","doi-asserted-by":"publisher","DOI":"10.1121\/1.2229005"},{"key":"e_1_3_3_3_11_1","doi-asserted-by":"crossref","unstructured":"Thomas\u00a0Le Cornu and Ben Milner. 2015. Reconstructing intelligible audio speech from visual speech features. In sixteenth annual conference of the international speech communication association.","DOI":"10.21437\/Interspeech.2015-139"},{"key":"e_1_3_3_3_12_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-939"},{"key":"e_1_3_3_3_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2006.1660033"},{"key":"e_1_3_3_3_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953127"},{"key":"e_1_3_3_3_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462280"},{"key":"e_1_3_3_3_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3411830"},{"key":"e_1_3_3_3_17_1","volume-title":"Lip reading using CNN and LSTM. Technical report","author":"Garg Amit","year":"2016","unstructured":"Amit Garg, Jonathan Noyola, and Sameep Bagadia. 2016. Lip reading using CNN and LSTM. Technical report, Stanford University, CS231 n project report (2016)."},{"key":"e_1_3_3_3_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2017.2757263"},{"key":"e_1_3_3_3_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2015.2407694"},{"key":"e_1_3_3_3_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458709.3458985"},{"key":"e_1_3_3_3_21_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2012.02.001"},{"key":"e_1_3_3_3_22_1","first-page":"253","article-title":"A review of current ultrasound exposure limits","volume":"21","author":"Howard Q","year":"2005","unstructured":"Carl\u00a0Q Howard, Colin\u00a0H Hansen, and Anthony\u00a0C Zander. 2005. A review of current ultrasound exposure limits. The Journal of Occupational Health and Safety of Australia and New Zealand 21, 3(2005), 253\u2013257.","journal-title":"The Journal of Occupational Health and Safety of Australia and New Zealand"},{"key":"e_1_3_3_3_23_1","doi-asserted-by":"publisher","unstructured":"Yuya Igarashi Kyosuke Futami and Kazuya Murao. 2022. Silent Speech Eyewear Interface: Silent Speech Recognition Method Using Eyewear with Infrared Distance Sensors. (2022) 33\u201338. https:\/\/doi.org\/10.1145\/3544794.3558458","DOI":"10.1145\/3544794.3558458"},{"key":"e_1_3_3_3_24_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2018.02.002"},{"key":"e_1_3_3_3_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534613"},{"key":"e_1_3_3_3_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3463519"},{"key":"e_1_3_3_3_27_1","volume-title":"Ultrasound-Based Silent Speech Interface Using Convolutional and Recurrent Neural Networks. Acta Acustica united with Acustica 105, 4","author":"Juanpere Eloi\u00a0Moliner","year":"2019","unstructured":"Eloi\u00a0Moliner Juanpere and Tam\u00e1s\u00a0G\u00e1bor Csap\u00f3. 2019. Ultrasound-Based Silent Speech Interface Using Convolutional and Recurrent Neural Networks. Acta Acustica united with Acustica 105, 4 (2019), 587\u2013590."},{"key":"e_1_3_3_3_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3172944.3172977"},{"key":"e_1_3_3_3_29_1","volume-title":"Proceedings of the Machine Learning for Health NeurIPS Workshop(Proceedings of Machine Learning Research, Vol.\u00a0116)","author":"Kapur Arnav","year":"2020","unstructured":"Arnav Kapur, Utkarsh Sarawgi, Eric Wadkins, Matthew Wu, Nora Hollenstein, and Pattie Maes. 2020. Non-Invasive Silent Speech Recognition in Multiple Sclerosis with Dysphonia. In Proceedings of the Machine Learning for Health NeurIPS Workshop(Proceedings of Machine Learning Research, Vol.\u00a0116), Adrian\u00a0V. Dalca, Matthew\u00a0B.A. McDermott, Emily Alsentzer, Samuel\u00a0G. Finlayson, Michael Oberst, Fabian Falck, and Brett Beaulieu-Jones (Eds.). PMLR, 25\u201338. https:\/\/proceedings.mlr.press\/v116\/kapur20a.html"},{"key":"e_1_3_3_3_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2017.2758999"},{"key":"e_1_3_3_3_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/BIOCAS.2018.8584786"},{"key":"e_1_3_3_3_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3411763.3451552"},{"key":"e_1_3_3_3_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3502015"},{"key":"e_1_3_3_3_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3399715.3399852"},{"key":"e_1_3_3_3_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300376"},{"key":"e_1_3_3_3_36_1","doi-asserted-by":"publisher","DOI":"10.21437\/AVSP.2017-13"},{"key":"e_1_3_3_3_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3519391.3519399"},{"key":"e_1_3_3_3_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534621"},{"key":"e_1_3_3_3_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3311823.3311831"},{"key":"e_1_3_3_3_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/SLT48900.2021.9383622"},{"key":"e_1_3_3_3_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/765891.765996"},{"key":"e_1_3_3_3_42_1","doi-asserted-by":"publisher","DOI":"10.1088\/1741-2552"},{"key":"e_1_3_3_3_43_1","doi-asserted-by":"publisher","unstructured":"Daniel Michelsanti Olga Slizovskaia Gloria Haro Emilia G\u00f3mez Zheng-Hua Tan and Jesper Jensen. 2020. Vocoder-Based Speech Synthesis from Silent Videos. (2020). https:\/\/doi.org\/10.48550\/ARXIV.2004.02541","DOI":"10.48550\/ARXIV.2004.02541"},{"key":"e_1_3_3_3_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2022.3162495"},{"key":"e_1_3_3_3_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445565"},{"key":"e_1_3_3_3_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445430"},{"key":"e_1_3_3_3_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3450618.3469176"},{"key":"e_1_3_3_3_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2002.5745028"},{"key":"e_1_3_3_3_49_1","volume-title":"A New Visual Speech Recognition Approach for RGB-D Cameras","author":"Rekik Ahmed","unstructured":"Ahmed Rekik, Achraf Ben-Hamadou, and Walid Mahdi. 2014. A New Visual Speech Recognition Approach for RGB-D Cameras. In Image Analysis and Recognition, Aur\u00e9lio Campilho and Mohamed Kamel (Eds.). Springer International Publishing, Cham, 21\u201328."},{"key":"e_1_3_3_3_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458709.3458941"},{"key":"e_1_3_3_3_51_1","unstructured":"Christine Rzepka. 2019. Examining the use of voice assistants: A value-focused thinking approach. (2019)."},{"key":"e_1_3_3_3_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/2634317.2634322"},{"key":"e_1_3_3_3_53_1","doi-asserted-by":"publisher","unstructured":"George Saon Gakuto Kurata Tom Sercu Kartik Audhkhasi Samuel Thomas Dimitrios Dimitriadis Xiaodong Cui Bhuvana Ramabhadran Michael Picheny Lynn-Li Lim Bergul Roomi and Phil Hall. 2017. English Conversational Telephone Speech Recognition by Humans and Machines. (2017). https:\/\/doi.org\/10.48550\/ARXIV.1703.02136","DOI":"10.48550\/ARXIV.1703.02136"},{"key":"e_1_3_3_3_54_1","volume-title":"Computers Helping People with Special Needs","author":"Schultz Tanja","unstructured":"Tanja Schultz. 2010. ICCHP Keynote: Recognizing Silent and Weak Speech Based on Electromyography. In Computers Helping People with Special Needs, Klaus Miesenberger, Joachim Klaus, Wolfgang Zagler, and Arthur Karshmer (Eds.). Springer Berlin Heidelberg, Berlin, Heidelberg, 595\u2013604."},{"key":"e_1_3_3_3_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3550281"},{"key":"e_1_3_3_3_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3242587.3242599"},{"key":"e_1_3_3_3_57_1","volume-title":"Multimodal Pattern Recognition of Social Signals in Human-Computer-Interaction","author":"Thanda Abhinav","unstructured":"Abhinav Thanda and Shankar\u00a0M. Venkatesan. 2017. Audio Visual Speech Recognition Using Deep Recurrent Neural Networks. In Multimodal Pattern Recognition of Social Signals in Human-Computer-Interaction, Friedhelm Schwenker and Stefan Scherer (Eds.). Springer International Publishing, Cham, 98\u2013109."},{"key":"e_1_3_3_3_58_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1078"},{"key":"e_1_3_3_3_59_1","doi-asserted-by":"publisher","DOI":"10.3390\/s21206744"},{"key":"e_1_3_3_3_60_1","doi-asserted-by":"publisher","unstructured":"Konstantinos Vougioukas Pingchuan Ma Stavros Petridis and Maja Pantic. 2019. Video-Driven Speech Reconstruction using Generative Adversarial Networks. (2019). https:\/\/doi.org\/10.48550\/ARXIV.1906.06301","DOI":"10.48550\/ARXIV.1906.06301"},{"key":"e_1_3_3_3_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472852"},{"key":"e_1_3_3_3_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/3369812"},{"key":"e_1_3_3_3_63_1","doi-asserted-by":"publisher","DOI":"10.3390\/brainsci10070442"},{"key":"e_1_3_3_3_64_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534606"},{"key":"e_1_3_3_3_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404983.3405513"},{"key":"e_1_3_3_3_66_1","doi-asserted-by":"publisher","unstructured":"W. Xiong J. Droppo X. Huang F. Seide M. Seltzer A. Stolcke D. Yu and G. Zweig. 2016. Achieving Human Parity in Conversational Speech Recognition. (2016). https:\/\/doi.org\/10.48550\/ARXIV.1610.05256","DOI":"10.48550\/ARXIV.1610.05256"},{"key":"e_1_3_3_3_67_1","doi-asserted-by":"publisher","DOI":"10.1109\/FG.2018.00088"},{"key":"e_1_3_3_3_68_1","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3350596"},{"key":"e_1_3_3_3_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/FG.2019.8756582"},{"key":"e_1_3_3_3_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/3448087"},{"key":"e_1_3_3_3_71_1","doi-asserted-by":"publisher","DOI":"10.1145\/3494987"},{"key":"e_1_3_3_3_72_1","doi-asserted-by":"publisher","unstructured":"Yongzhao Zhang Yi-Chao Chen Haonan Wang and Xingyu Jin. 2021. CELIP: Ultrasonic-Based Lip Reading with Channel Estimation Approach for Virtual Reality Systems. In Adjunct Proceedings of the 2021 ACM International Joint Conference on Pervasive and Ubiquitous Computing and Proceedings of the 2021 ACM International Symposium on Wearable Computers (Virtual USA) (UbiComp \u201921). Association for Computing Machinery New York NY USA 580\u2013585. https:\/\/doi.org\/10.1145\/3460418.3480163","DOI":"10.1145\/3460418.3480163"},{"key":"e_1_3_3_3_73_1","doi-asserted-by":"publisher","DOI":"10.1145\/3381008"},{"key":"e_1_3_3_3_74_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2009.2030637"}],"event":{"name":"CHI '23: CHI Conference on Human Factors in Computing Systems","location":"Hamburg Germany","acronym":"CHI '23","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 2023 CHI Conference on Human Factors in Computing Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3544548.3580801","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3544548.3580801","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:47:16Z","timestamp":1750178836000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3544548.3580801"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,4,19]]},"references-count":74,"alternative-id":["10.1145\/3544548.3580801","10.1145\/3544548"],"URL":"https:\/\/doi.org\/10.1145\/3544548.3580801","relation":{},"subject":[],"published":{"date-parts":[[2023,4,19]]},"assertion":[{"value":"2023-04-19","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}