{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,19]],"date-time":"2026-06-19T13:32:38Z","timestamp":1781875958762,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":46,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,4,25]],"date-time":"2025-04-25T00:00:00Z","timestamp":1745539200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-sa\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000002","name":"NIH (National Institutes of Health)","doi-asserted-by":"publisher","award":["R01EY03568801,R01EY03008503"],"award-info":[{"award-number":["R01EY03568801,R01EY03008503"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2113485"],"award-info":[{"award-number":["2113485"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,4,26]]},"DOI":"10.1145\/3706598.3713376","type":"proceedings-article","created":{"date-parts":[[2025,4,24]],"date-time":"2025-04-24T04:24:56Z","timestamp":1745468696000},"page":"1-17","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["Tap&amp;Say: Touch Location-Informed Large Language Model for Multimodal Text Correction on Smartphones"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2628-2523","authenticated-orcid":false,"given":"Maozheng","family":"Zhao","sequence":"first","affiliation":[{"name":"Department of Computer Science, Stony Brook University, Stony Brook, New York, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5695-2869","authenticated-orcid":false,"given":"Michael Xuelin","family":"Huang","sequence":"additional","affiliation":[{"name":"Google, Mountain View, California, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-1676-968X","authenticated-orcid":false,"given":"Nathan G","family":"Huang","sequence":"additional","affiliation":[{"name":"Westlake High School, Austin, Texas, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4514-1715","authenticated-orcid":false,"given":"Shanqing","family":"Cai","sequence":"additional","affiliation":[{"name":"Google, Mountain View, California, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-9231-7633","authenticated-orcid":false,"given":"Henry","family":"Huang","sequence":"additional","affiliation":[{"name":"Harvard University, Cambridge, Massachusetts, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-6550-163X","authenticated-orcid":false,"given":"Michael G","family":"Huang","sequence":"additional","affiliation":[{"name":"University of Texas at Austin, Austin, Texas, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0752-2090","authenticated-orcid":false,"given":"Shumin","family":"Zhai","sequence":"additional","affiliation":[{"name":"Google, Mountain View, California, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1768-7043","authenticated-orcid":false,"given":"IV","family":"Ramakrishnan","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Stony Brook University, Stony Brook, New York, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9716-7709","authenticated-orcid":false,"given":"Xiaojun","family":"Bi","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Stony Brook University, Stony Brook, New York, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,4,25]]},"reference":[{"key":"e_1_3_3_2_2_2","unstructured":"2023. SpeechRecognizer. https:\/\/developer.android.com\/reference\/android\/speech\/SpeechRecognizer [Online; Accessed: 2023-11-15]."},{"key":"e_1_3_3_2_3_2","unstructured":"2024. Elements of a Prompt. https:\/\/www.promptingguide.ai\/introduction\/elements. [Online; Accessed: 2024-09-12]."},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"publisher","DOI":"10.20380\/GI2019.04"},{"key":"e_1_3_3_2_5_2","unstructured":"Rosana Ardila Megan Branson Kelly Davis Michael Henretty Michael Kohler Josh Meyer Reuben Morais Lindsay Saunders Francis\u00a0M Tyers and Gregor Weber. 2019. Common voice: A massively-multilingual speech corpus. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1912.06670 (2019)."},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"publisher","DOI":"10.1145\/2858036.2858407"},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"publisher","DOI":"10.1145\/2470654.2466180"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/2556288.2557414"},{"key":"e_1_3_3_2_9_2","unstructured":"Paul\u00a0E. Black. 2021. Ratcliff\/Obershelp pattern recognition. in Dictionary of Algorithms and Data Structures Paul E. Black ed. 8 January 2021. Available from: https:\/\/www.nist.gov\/dads\/HTML\/ratcliffObershelp.html. [Online; Accessed: 2023-11-11]."},{"key":"e_1_3_3_2_10_2","unstructured":"Tom\u00a0B Brown. 2020. Language models are few-shot learners. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2005.14165 (2020)."},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","DOI":"10.1145\/3379337.3415857"},{"key":"e_1_3_3_2_12_2","unstructured":"Mark Davies. 2018. The corpus of contemporary American English: 1990-present. Google Scholar Google Scholar Reference (2018)."},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-94-017-2367-15"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"publisher","DOI":"10.1145\/3472749.3474795"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","DOI":"10.1145\/502716.502753"},{"key":"e_1_3_3_2_16_2","unstructured":"Google. [n. d.]. LiteRT overview. https:\/\/ai.google.dev\/edge\/litert [Online; Accessed: 2024-12-05]."},{"key":"e_1_3_3_2_17_2","unstructured":"Google. 2021. Get started with Voice Access. https:\/\/support.google.com\/accessibility\/android\/answer\/6151848?hl=en. [Online; Accessed: 2021-07-18]."},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","DOI":"10.1145\/1978942.1979308"},{"key":"e_1_3_3_2_19_2","unstructured":"iMore.com. 2021. Everything you can do with Voice Control on iPhone and iPad. https:\/\/www.imore.com\/everything-you-can-do-voice-control-iphone-and-ipad. [Online; Accessed: 2021-07-18]."},{"key":"e_1_3_3_2_20_2","unstructured":"Grammarly Inc.2020. Grammarly Keyboard. https:\/\/en.wikipedia.org\/wiki\/Grammarly [Online; accessed May-2020]."},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1145\/1868914.1869004"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICMI.2002.1167020"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"crossref","unstructured":"Dhruv Kumar Vipul Raheja Alice Kaiser-Schatzlein Robyn Perry Apurva Joshi Justin Hugues-Nuger Samuel Lou and Navid Chowdhury. 2023. Speakerly: A Voice-based Writing Assistant for Text Composition. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.16251 (2023).","DOI":"10.18653\/v1\/2023.emnlp-industry.38"},{"key":"e_1_3_3_2_24_2","unstructured":"Abner Li. 2017. Google\u2019s speech recognition is now almost as accurate as humans. https:\/\/9to5google.com\/2017\/06\/01\/google-speech-recognition-humans\/ [Online; Accessed: 2023-11-15]."},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"crossref","unstructured":"Belinda\u00a0Z Li Jason Eisner Adam Pauls and Sam Thomson. 2023. Toward Interactive Dictation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2307.04008 (2023).","DOI":"10.18653\/v1\/2023.acl-long.854"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"crossref","unstructured":"Susan Lin Jeremy Warner JD Zamfirescu-Pereira Matthew\u00a0G Lee Sauhard Jain Michael\u00a0Xuelin Huang Piyawat Lertvittayakumjorn Shanqing Cai Shumin Zhai Bj\u00f6rn Hartmann et\u00a0al. 2024. Rambler: Supporting Writing With Speech via LLM-Assisted Gist Manipulation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.10838 (2024).","DOI":"10.1145\/3613904.3642217"},{"key":"e_1_3_3_2_27_2","unstructured":"Renjie Liu Yanxiang Zhang Yun Zhu Haicheng Sun Yuanbo Zhang Michael\u00a0Xuelin Huang Shanqing Cai Lei Meng and Shumin Zhai. 2024. Proofread: Fixes All Errors with One Tap. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.04523 (2024)."},{"key":"e_1_3_3_2_28_2","unstructured":"Google LLC.2020. Gboard. https:\/\/en.wikipedia.org\/wiki\/Gboard [Online; accessed May-2020]."},{"key":"e_1_3_3_2_29_2","unstructured":"Shayne Longpre Le Hou Tu Vu Albert Webson Hyung\u00a0Won Chung Yi Tay Denny Zhou Quoc\u00a0V Le Barret Zoph Jason Wei et\u00a0al. 2023. The flan collection: Designing data and methods for effective instruction tuning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2301.13688 (2023)."},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2011-236"},{"key":"e_1_3_3_2_31_2","unstructured":"OpenAI. 2024. Whisper-large-v3-turbo. Hugging Face Model Hub. Available at https:\/\/huggingface.co\/openai\/whisper-large-v3-turbo."},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","unstructured":"Sharon Oviatt and Philip Cohen. 2000. Perceptual User Interfaces: Multimodal Interfaces That Process What Comes Naturally. Commun. ACM 43 3 (March 2000) 45\u201353. 10.1145\/330534.330538","DOI":"10.1145\/330534.330538"},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"publisher","unstructured":"Sharon Oviatt Phil Cohen Lizhong Wu John Vergo Lisbeth Duncan Bernhard Suhm Josh Bers Thomas Holzman Terry Winograd James Landay Jim Larson and David Ferro. 2000. Designing the User Interface for Multimodal Speech and Pen-Based Gesture Applications: State-of-the-Art Systems and Future Research Directions. Hum.-Comput. Interact. 15 4 (Dec. 2000) 263\u2013322. 10.1207\/S15327051HCI15041","DOI":"10.1207\/S15327051HCI15041"},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"publisher","DOI":"10.5555\/3019272"},{"key":"e_1_3_3_2_35_2","volume-title":"Proceedings of 21st International Conference on Human-Computer Interaction with Mobile Devices and Services (MobileHCI\u201919)","author":"Palin Kseniia","year":"2019","unstructured":"Kseniia Palin, Anna Feit, Sunjun Kim, Per\u00a0Ola Kristensson, and Antti Oulasvirta. 2019. How do People Type on Mobile Devices? Observations from a Study with 37,000 Volunteers.. In Proceedings of 21st International Conference on Human-Computer Interaction with Mobile Devices and Services (MobileHCI\u201919). ACM."},{"key":"e_1_3_3_2_36_2","unstructured":"PyTorch. [n. d.]. PyTorch Mobile End-to-end workflow from Training to Deployment for iOS and Android mobile devices. https:\/\/pytorch.org\/mobile\/home\/ [Online; Accessed: 2024-12-05]."},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2010.5700825"},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"publisher","DOI":"10.1145\/2388676.2388793"},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300433"},{"key":"e_1_3_3_2_40_2","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan\u00a0N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_3_2_41_2","doi-asserted-by":"publisher","DOI":"10.1145\/2702123.2702135"},{"key":"e_1_3_3_2_42_2","doi-asserted-by":"publisher","DOI":"10.1145\/1240624.1240727"},{"key":"e_1_3_3_2_43_2","doi-asserted-by":"publisher","DOI":"10.1145\/2556288.2557412"},{"key":"e_1_3_3_2_44_2","doi-asserted-by":"crossref","unstructured":"Thomas Wolf Lysandre Debut Victor Sanh Julien Chaumond Clement Delangue Anthony Moi Pierric Cistac Tim Rault R\u00e9mi Louf Morgan Funtowicz et\u00a0al. 2019. Huggingface\u2019s transformers: State-of-the-art natural language processing. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1910.03771 (2019).","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"e_1_3_3_2_45_2","doi-asserted-by":"publisher","DOI":"10.1145\/3332165.3347924"},{"key":"e_1_3_3_2_46_2","doi-asserted-by":"publisher","DOI":"10.1145\/3472749.3474742"},{"key":"e_1_3_3_2_47_2","doi-asserted-by":"publisher","DOI":"10.1145\/3490099.3511103"}],"event":{"name":"CHI 2025: CHI Conference on Human Factors in Computing Systems","location":"Yokohama Japan","acronym":"CHI '25","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 2025 CHI Conference on Human Factors in Computing Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3706598.3713376","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3706598.3713376","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3706598.3713376","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,4]],"date-time":"2025-07-04T05:28:56Z","timestamp":1751606936000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3706598.3713376"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,25]]},"references-count":46,"alternative-id":["10.1145\/3706598.3713376","10.1145\/3706598"],"URL":"https:\/\/doi.org\/10.1145\/3706598.3713376","relation":{},"subject":[],"published":{"date-parts":[[2025,4,25]]},"assertion":[{"value":"2025-04-25","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}